diff --git a/alembic/versions/2025_10_04_1541-445d8858b23a_remove_agency_location_columns.py b/alembic/versions/2025_10_04_1541-445d8858b23a_remove_agency_location_columns.py new file mode 100644 index 00000000..c7d98156 --- /dev/null +++ b/alembic/versions/2025_10_04_1541-445d8858b23a_remove_agency_location_columns.py @@ -0,0 +1,29 @@ +"""Remove agency location columns + +Revision ID: 445d8858b23a +Revises: dc6ab5157c49 +Create Date: 2025-10-04 15:41:52.384222 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '445d8858b23a' +down_revision: Union[str, None] = 'dc6ab5157c49' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +TABLE_NAME = 'agencies' + +def upgrade() -> None: + op.drop_column(TABLE_NAME, 'locality') + op.drop_column(TABLE_NAME, 'state') + op.drop_column(TABLE_NAME, 'county') + + +def downgrade() -> None: + pass diff --git a/src/api/endpoints/review/next/convert.py b/src/api/endpoints/review/next/convert.py deleted file mode 100644 index 2789895f..00000000 --- a/src/api/endpoints/review/next/convert.py +++ /dev/null @@ -1,120 +0,0 @@ -from collections import Counter - -from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo, AgencySuggestionAndUserCount -from src.api.endpoints.review.next.dto import FinalReviewAnnotationAgencyInfo, FinalReviewAnnotationAgencyAutoInfo -from src.core.enums import SuggestionType -from src.db.models.impl.agency.sqlalchemy import Agency -from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion - - -def convert_agency_info_to_final_review_annotation_agency_info( - subtasks: list[URLAutoAgencyIDSubtask], - confirmed_agencies: list[LinkURLAgency], - user_agency_suggestions: list[UserUrlAgencySuggestion] -) -> FinalReviewAnnotationAgencyInfo: - - confirmed_agency_info: list[GetNextURLForAgencyAgencyInfo] = ( - _convert_confirmed_agencies_to_final_review_annotation_agency_info( - confirmed_agencies - ) - ) - - agency_auto_info: FinalReviewAnnotationAgencyAutoInfo = ( - _convert_url_auto_agency_suggestions_to_final_review_annotation_agency_auto_info( - subtasks - ) - ) - - agency_user_suggestions: list[AgencySuggestionAndUserCount] = ( - _convert_user_url_agency_suggestion_to_final_review_annotation_agency_user_info( - user_agency_suggestions - ) - ) - - return FinalReviewAnnotationAgencyInfo( - confirmed=confirmed_agency_info, - user=agency_user_suggestions, - auto=agency_auto_info - ) - -def _convert_confirmed_agencies_to_final_review_annotation_agency_info( - confirmed_agencies: list[LinkURLAgency] -) -> list[GetNextURLForAgencyAgencyInfo]: - results: list[GetNextURLForAgencyAgencyInfo] = [] - for confirmed_agency in confirmed_agencies: - agency = confirmed_agency.agency - agency_info = _convert_agency_to_get_next_url_for_agency_agency_info( - suggestion_type=SuggestionType.CONFIRMED, - agency=agency - ) - results.append(agency_info) - return results - -def _convert_user_url_agency_suggestion_to_final_review_annotation_agency_user_info( - user_url_agency_suggestions: list[UserUrlAgencySuggestion] -) -> list[AgencySuggestionAndUserCount]: - agency_id_count: Counter[int] = Counter() - agency_id_to_agency: dict[int, GetNextURLForAgencyAgencyInfo] = {} - for suggestion in user_url_agency_suggestions: - agency_id_count[suggestion.agency_id] += 1 - agency_id_to_agency[suggestion.agency_id] = _convert_agency_to_get_next_url_for_agency_agency_info( - suggestion_type=SuggestionType.USER_SUGGESTION, - agency=suggestion.agency - ) - - suggestions_and_counts: list[AgencySuggestionAndUserCount] = [] - for agency_id, count in agency_id_count.items(): - suggestions_and_counts.append( - AgencySuggestionAndUserCount( - suggestion=agency_id_to_agency[agency_id], - user_count=count - ) - ) - - suggestions_and_counts.sort(key=lambda x: x.user_count, reverse=True) - - return suggestions_and_counts - -def _convert_agency_to_get_next_url_for_agency_agency_info( - suggestion_type: SuggestionType, - agency: Agency | None -) -> GetNextURLForAgencyAgencyInfo: - if agency is None: - if suggestion_type == SuggestionType.UNKNOWN: - return GetNextURLForAgencyAgencyInfo( - suggestion_type=suggestion_type, - ) - raise ValueError("agency cannot be None for suggestion type other than unknown") - - return GetNextURLForAgencyAgencyInfo( - suggestion_type=suggestion_type, - pdap_agency_id=agency.agency_id, - agency_name=agency.name, - state=agency.state, - county=agency.county, - locality=agency.locality - ) - -def _convert_url_auto_agency_suggestions_to_final_review_annotation_agency_auto_info( - subtasks: list[URLAutoAgencyIDSubtask] -) -> FinalReviewAnnotationAgencyAutoInfo: - results: list[GetNextURLForAgencyAgencyInfo] = [] - count_agencies_not_found: int = 0 - for subtask in subtasks: - if not subtask.agencies_found: - count_agencies_not_found += 1 - continue - suggestions: list[AgencyIDSubtaskSuggestion] = subtask.suggestions - for suggestion in suggestions: - info: GetNextURLForAgencyAgencyInfo = _convert_agency_to_get_next_url_for_agency_agency_info( - suggestion_type=SuggestionType.AUTO_SUGGESTION, - agency=suggestion.agency - ) - results.append(info) - return FinalReviewAnnotationAgencyAutoInfo( - unknown=count_agencies_not_found == len(subtasks), - suggestions=results - ) diff --git a/src/api/endpoints/review/next/core.py b/src/api/endpoints/review/next/core.py deleted file mode 100644 index d19d4926..00000000 --- a/src/api/endpoints/review/next/core.py +++ /dev/null @@ -1,221 +0,0 @@ -from sqlalchemy import FromClause, select, Select, desc, asc, func, CTE -from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import joinedload - -from src.api.endpoints.review.next.convert import convert_agency_info_to_final_review_annotation_agency_info -from src.api.endpoints.review.next.dto import FinalReviewOptionalMetadata, FinalReviewBatchInfo, \ - GetNextURLForFinalReviewOuterResponse, GetNextURLForFinalReviewResponse, FinalReviewAnnotationInfo -from src.api.endpoints.review.next.extract import extract_html_content_infos, extract_optional_metadata -from src.api.endpoints.review.next.queries.count_reviewed import COUNT_REVIEWED_CTE -from src.api.endpoints.review.next.queries.eligible_urls import build_eligible_urls_cte -from src.api.endpoints.review.next.templates.count_cte import CountCTE -from src.collectors.enums import URLStatus -from src.core.tasks.url.operators.html.scraper.parser.util import convert_to_response_html_info -from src.db.constants import USER_ANNOTATION_MODELS -from src.db.dto_converter import DTOConverter -from src.db.dtos.url.html_content import URLHTMLContentInfo -from src.db.exceptions import FailedQueryException -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion -from src.db.queries.base.builder import QueryBuilderBase -from src.db.queries.implementations.core.common.annotation_exists_.core import AnnotationExistsCTEQueryBuilder - -TOTAL_DISTINCT_ANNOTATION_COUNT_LABEL = "total_distinct_annotation_count" - - -class GetNextURLForFinalReviewQueryBuilder(QueryBuilderBase): - - def __init__(self, batch_id: int | None = None): - super().__init__() - self.batch_id = batch_id - self.anno_exists_builder = AnnotationExistsCTEQueryBuilder() - # The below relationships are joined directly to the URL - self.single_join_relationships = [ - URL.html_content, - URL.auto_record_type_suggestion, - URL.auto_relevant_suggestion, - URL.user_relevant_suggestions, - URL.user_record_type_suggestions, - URL.optional_data_source_metadata, - ] - # The below relationships are joined to entities that are joined to the URL - self.double_join_relationships = [ - (URL.user_agency_suggestions, UserUrlAgencySuggestion.agency), - (URL.confirmed_agencies, LinkURLAgency.agency) - ] - - self.count_label = "count" - - def _get_where_exist_clauses( - self, - query: FromClause, - ): - where_clauses = [] - for model in USER_ANNOTATION_MODELS: - label = self.anno_exists_builder.get_exists_label(model) - where_clause = getattr(query.c, label) == 1 - where_clauses.append(where_clause) - return where_clauses - - def _build_base_query(self) -> Select: - eligible_urls: CTE = build_eligible_urls_cte(batch_id=self.batch_id) - - query = ( - select( - URL, - ) - .select_from( - eligible_urls - ) - .join( - URL, - URL.id == eligible_urls.c.url_id - ) - .where( - URL.status == URLStatus.OK.value - ) - ) - return query - - async def _apply_options( - self, - url_query: Select - ): - return url_query.options( - *[ - joinedload(relationship) - for relationship in self.single_join_relationships - ], - *[ - joinedload(primary).joinedload(secondary) - for primary, secondary in self.double_join_relationships - ], - joinedload(URL.auto_agency_subtasks) - .joinedload(URLAutoAgencyIDSubtask.suggestions) - .contains_eager(AgencyIDSubtaskSuggestion.agency) - ) - - - async def get_batch_info(self, session: AsyncSession) -> FinalReviewBatchInfo | None: - if self.batch_id is None: - return None - - count_reviewed_query: CountCTE = COUNT_REVIEWED_CTE - - count_ready_query = await self.get_count_ready_query() - - full_query = ( - select( - func.coalesce(count_reviewed_query.count, 0).label("count_reviewed"), - func.coalesce(count_ready_query.c[self.count_label], 0).label("count_ready_for_review") - ) - .select_from( - count_ready_query.outerjoin( - count_reviewed_query.cte, - count_reviewed_query.batch_id == count_ready_query.c.batch_id - ) - ) - ) - - raw_result = await session.execute(full_query) - return FinalReviewBatchInfo(**raw_result.mappings().one()) - - async def get_count_ready_query(self): - # TODO: Migrate to separate query builder - builder = self.anno_exists_builder - count_ready_query = ( - select( - LinkBatchURL.batch_id, - func.count(URL.id).label(self.count_label) - ) - .select_from(LinkBatchURL) - .join(URL) - .join( - builder.query, - builder.url_id == URL.id - ) - .where( - LinkBatchURL.batch_id == self.batch_id, - URL.status == URLStatus.OK.value, - *self._get_where_exist_clauses( - builder.query - ) - ) - .group_by(LinkBatchURL.batch_id) - .subquery("count_ready") - ) - return count_ready_query - - async def run( - self, - session: AsyncSession - ) -> GetNextURLForFinalReviewOuterResponse: - await self.anno_exists_builder.build() - - url_query = await self.build_url_query() - - raw_result = await session.execute(url_query.limit(1)) - row = raw_result.unique().first() - - if row is None: - return GetNextURLForFinalReviewOuterResponse( - next_source=None, - remaining=0 - ) - - count_query = ( - select( - func.count() - ).select_from(url_query.subquery("count")) - ) - remaining_result = (await session.execute(count_query)).scalar() - - - result: URL = row[0] - - html_content_infos: list[URLHTMLContentInfo] = await extract_html_content_infos(result) - optional_metadata: FinalReviewOptionalMetadata = await extract_optional_metadata(result) - - batch_info = await self.get_batch_info(session) - try: - - next_source = GetNextURLForFinalReviewResponse( - id=result.id, - url=result.url, - html_info=convert_to_response_html_info(html_content_infos), - name=result.name, - description=result.description, - annotations=FinalReviewAnnotationInfo( - relevant=DTOConverter.final_review_annotation_relevant_info( - user_suggestions=result.user_relevant_suggestions, - auto_suggestion=result.auto_relevant_suggestion - ), - record_type=DTOConverter.final_review_annotation_record_type_info( - user_suggestions=result.user_record_type_suggestions, - auto_suggestion=result.auto_record_type_suggestion - ), - agency=convert_agency_info_to_final_review_annotation_agency_info( - subtasks=result.auto_agency_subtasks, - user_agency_suggestions=result.user_agency_suggestions, - confirmed_agencies=result.confirmed_agencies - ) - ), - optional_metadata=optional_metadata, - batch_info=batch_info - ) - return GetNextURLForFinalReviewOuterResponse( - next_source=next_source, - remaining=remaining_result - ) - except Exception as e: - raise FailedQueryException(f"Failed to convert result for url id {result.id} to response") from e - - async def build_url_query(self): - url_query = self._build_base_query() - url_query = await self._apply_options(url_query) - - return url_query diff --git a/src/api/endpoints/review/next/extract.py b/src/api/endpoints/review/next/extract.py deleted file mode 100644 index aca642e0..00000000 --- a/src/api/endpoints/review/next/extract.py +++ /dev/null @@ -1,23 +0,0 @@ -from src.api.endpoints.review.next.dto import FinalReviewOptionalMetadata -from src.db.dtos.url.html_content import URLHTMLContentInfo -from src.db.models.impl.url.core.sqlalchemy import URL - - -async def extract_html_content_infos( - url: URL -)-> list[URLHTMLContentInfo]: - html_content = url.html_content - html_content_infos = [ - URLHTMLContentInfo(**html_info.__dict__) - for html_info in html_content - ] - return html_content_infos - -async def extract_optional_metadata(url: URL) -> FinalReviewOptionalMetadata: - if url.optional_data_source_metadata is None: - return FinalReviewOptionalMetadata() - return FinalReviewOptionalMetadata( - record_formats=url.optional_data_source_metadata.record_formats, - data_portal_type=url.optional_data_source_metadata.data_portal_type, - supplying_entity=url.optional_data_source_metadata.supplying_entity - ) \ No newline at end of file diff --git a/src/api/endpoints/review/next/queries/__init__.py b/src/api/endpoints/review/next/queries/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/api/endpoints/review/next/queries/count_reviewed.py b/src/api/endpoints/review/next/queries/count_reviewed.py deleted file mode 100644 index 91349cb5..00000000 --- a/src/api/endpoints/review/next/queries/count_reviewed.py +++ /dev/null @@ -1,18 +0,0 @@ -from sqlalchemy import select, func - -from src.api.endpoints.review.next.templates.count_cte import CountCTE -from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL - -COUNT_REVIEWED_CTE: CountCTE = CountCTE( - select( - Batch.id.label("batch_id"), - func.count(FlagURLValidated.url_id).label("count") - ) - .select_from(Batch) - .join(LinkBatchURL) - .outerjoin(FlagURLValidated, FlagURLValidated.url_id == LinkBatchURL.url_id) - .group_by(Batch.id) - .cte("count_reviewed") -) \ No newline at end of file diff --git a/src/api/endpoints/review/next/queries/eligible_urls.py b/src/api/endpoints/review/next/queries/eligible_urls.py deleted file mode 100644 index bee5cea2..00000000 --- a/src/api/endpoints/review/next/queries/eligible_urls.py +++ /dev/null @@ -1,35 +0,0 @@ -from sqlalchemy import CTE, select, Select - -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView - -uafw = URLAnnotationFlagsView - -def build_eligible_urls_cte(batch_id: int | None = None) -> CTE: - query: Select = ( - select( - uafw.url_id, - ) - .where( - # uafw.has_auto_agency_suggestion.is_(True), - # uafw.has_auto_record_type_suggestion.is_(True), - # uafw.has_auto_relevant_suggestion.is_(True), - uafw.has_user_relevant_suggestion.is_(True), - uafw.has_user_agency_suggestion.is_(True), - uafw.has_user_record_type_suggestion.is_(True), - uafw.was_reviewed.is_(False) - ) - ) - - if batch_id is not None: - query = ( - query.join( - LinkBatchURL, - LinkBatchURL.url_id == uafw.url_id - ) - .where( - LinkBatchURL.batch_id == batch_id - ) - ) - - return query.cte("eligible_urls") diff --git a/src/api/endpoints/review/next/templates/__init__.py b/src/api/endpoints/review/next/templates/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/api/endpoints/review/next/templates/count_cte.py b/src/api/endpoints/review/next/templates/count_cte.py deleted file mode 100644 index 0abbbab4..00000000 --- a/src/api/endpoints/review/next/templates/count_cte.py +++ /dev/null @@ -1,15 +0,0 @@ -from sqlalchemy import CTE, Column - - -class CountCTE: - - def __init__(self, cte: CTE): - self.cte = cte - - @property - def batch_id(self) -> Column[int]: - return self.cte.c['batch_id'] - - @property - def count(self) -> Column[int]: - return self.cte.c['count'] \ No newline at end of file diff --git a/src/api/endpoints/review/routes.py b/src/api/endpoints/review/routes.py deleted file mode 100644 index c2ceada9..00000000 --- a/src/api/endpoints/review/routes.py +++ /dev/null @@ -1,59 +0,0 @@ -from fastapi import APIRouter, Depends, Query - -from src.api.dependencies import get_async_core -from src.api.endpoints.review.approve.dto import FinalReviewApprovalInfo -from src.api.endpoints.review.next.dto import GetNextURLForFinalReviewOuterResponse -from src.api.endpoints.review.reject.dto import FinalReviewRejectionInfo -from src.core.core import AsyncCore -from src.security.dtos.access_info import AccessInfo -from src.security.enums import Permissions -from src.security.manager import require_permission - -review_router = APIRouter( - prefix="/review", - tags=["Review"], - responses={404: {"description": "Not found"}}, -) - -requires_final_review_permission = require_permission(Permissions.SOURCE_COLLECTOR_FINAL_REVIEW) - -batch_id_query = Query( - description="The batch id of the next URL to get. " - "If not specified, defaults to first qualifying URL", - default=None -) - -@review_router.get("/next-source") -async def get_next_source( - core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(requires_final_review_permission), - batch_id: int | None = batch_id_query, -) -> GetNextURLForFinalReviewOuterResponse: - return await core.get_next_source_for_review(batch_id=batch_id) - -@review_router.post("/approve-source") -async def approve_source( - core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(requires_final_review_permission), - approval_info: FinalReviewApprovalInfo = FinalReviewApprovalInfo, - batch_id: int | None = batch_id_query, -) -> GetNextURLForFinalReviewOuterResponse: - await core.approve_url( - approval_info, - access_info=access_info, - ) - return await core.get_next_source_for_review(batch_id=batch_id) - -@review_router.post("/reject-source") -async def reject_source( - core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(requires_final_review_permission), - review_info: FinalReviewRejectionInfo = FinalReviewRejectionInfo, - batch_id: int | None = batch_id_query, -) -> GetNextURLForFinalReviewOuterResponse: - await core.reject_url( - url_id=review_info.url_id, - access_info=access_info, - rejection_reason=review_info.rejection_reason - ) - return await core.get_next_source_for_review(batch_id=batch_id) diff --git a/src/api/main.py b/src/api/main.py index 1eb0a22b..d1097de3 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -11,7 +11,6 @@ from src.api.endpoints.batch.routes import batch_router from src.api.endpoints.collector.routes import collector_router from src.api.endpoints.metrics.routes import metrics_router -from src.api.endpoints.review.routes import review_router from src.api.endpoints.root import root_router from src.api.endpoints.search.routes import search_router from src.api.endpoints.submit.routes import submit_router @@ -174,7 +173,6 @@ async def redirect_docs(): annotate_router, url_router, task_router, - review_router, search_router, metrics_router, submit_router diff --git a/src/core/core.py b/src/core/core.py index 2875f8a8..cce56dfe 100644 --- a/src/core/core.py +++ b/src/core/core.py @@ -162,15 +162,6 @@ async def get_task_info(self, task_id: int) -> TaskInfo: #region Annotations and Review - - async def get_next_source_for_review( - self, - batch_id: Optional[int] - ) -> GetNextURLForFinalReviewOuterResponse: - return await self.adb_client.get_next_url_for_final_review( - batch_id=batch_id - ) - async def get_next_url_for_all_annotations( self, user_id: int, @@ -197,28 +188,6 @@ async def submit_url_for_all_annotations( ) ) - async def approve_url( - self, - approval_info: FinalReviewApprovalInfo, - access_info: AccessInfo - ): - await self.adb_client.approve_url( - approval_info=approval_info, - user_id=access_info.user_id - ) - - async def reject_url( - self, - url_id: int, - access_info: AccessInfo, - rejection_reason: RejectionReason - ): - await self.adb_client.reject_url( - url_id=url_id, - user_id=access_info.user_id, - rejection_reason=rejection_reason - ) - async def upload_manual_batch( self, dto: ManualBatchInputDTO, diff --git a/src/db/client/async_.py b/src/db/client/async_.py index 750303c6..4e0c1dda 100644 --- a/src/db/client/async_.py +++ b/src/db/client/async_.py @@ -32,8 +32,6 @@ from src.api.endpoints.review.approve.dto import FinalReviewApprovalInfo from src.api.endpoints.review.approve.query_.core import ApproveURLQueryBuilder from src.api.endpoints.review.enums import RejectionReason -from src.api.endpoints.review.next.core import GetNextURLForFinalReviewQueryBuilder -from src.api.endpoints.review.next.dto import GetNextURLForFinalReviewOuterResponse from src.api.endpoints.review.reject.query import RejectURLQueryBuilder from src.api.endpoints.search.dtos.response import SearchURLResponse from src.api.endpoints.task.by_id.dto import TaskInfo @@ -598,9 +596,6 @@ async def upsert_new_agencies( if agency is None: agency = Agency(agency_id=suggestion.pdap_agency_id) agency.name = suggestion.agency_name - agency.state = suggestion.state - agency.county = suggestion.county - agency.locality = suggestion.locality agency.agency_type = AgencyType.UNKNOWN session.add(agency) @@ -655,19 +650,6 @@ async def get_urls_with_confirmed_agencies(self, session: AsyncSession) -> list[ results = await session.execute(statement) return list(results.scalars().all()) - @session_manager - async def get_next_url_for_final_review( - self, - session: AsyncSession, - batch_id: Optional[int] - ) -> GetNextURLForFinalReviewOuterResponse: - - builder = GetNextURLForFinalReviewQueryBuilder( - batch_id=batch_id - ) - result = await builder.run(session) - return result - async def approve_url( self, approval_info: FinalReviewApprovalInfo, diff --git a/src/db/models/impl/agency/sqlalchemy.py b/src/db/models/impl/agency/sqlalchemy.py index c8a19a56..002b0255 100644 --- a/src/db/models/impl/agency/sqlalchemy.py +++ b/src/db/models/impl/agency/sqlalchemy.py @@ -22,9 +22,6 @@ class Agency( agency_id = Column(Integer, primary_key=True) name = Column(String, nullable=False) - state = Column(String, nullable=True) - county = Column(String, nullable=True) - locality = Column(String, nullable=True) agency_type = enum_column(AgencyType, name="agency_type_enum") jurisdiction_type = enum_column( JurisdictionType, diff --git a/tests/automated/integration/api/conftest.py b/tests/automated/integration/api/conftest.py index 4b9e2fa4..fa019469 100644 --- a/tests/automated/integration/api/conftest.py +++ b/tests/automated/integration/api/conftest.py @@ -5,14 +5,12 @@ import pytest_asyncio from starlette.testclient import TestClient -from src.api.endpoints.review.routes import requires_final_review_permission from src.api.main import app from src.core.core import AsyncCore -from src.security.manager import get_access_info from src.security.dtos.access_info import AccessInfo from src.security.enums import Permissions +from src.security.manager import get_access_info from tests.automated.integration.api._helpers.RequestValidator import RequestValidator -from tests.conftest import set_env_vars from tests.helpers.api_test_helper import APITestHelper MOCK_USER_ID = 1 @@ -42,7 +40,6 @@ def override_access_info() -> AccessInfo: def client(disable_task_flags) -> Generator[TestClient, None, None]: with TestClient(app) as c: app.dependency_overrides[get_access_info] = override_access_info - app.dependency_overrides[requires_final_review_permission] = override_access_info async_core: AsyncCore = c.app.state.async_core # Interfaces to the web should be mocked diff --git a/tests/automated/integration/api/review/__init__.py b/tests/automated/integration/api/review/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/automated/integration/api/review/conftest.py b/tests/automated/integration/api/review/conftest.py deleted file mode 100644 index 198bef59..00000000 --- a/tests/automated/integration/api/review/conftest.py +++ /dev/null @@ -1,31 +0,0 @@ -import pytest_asyncio - -from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo -from src.core.enums import RecordType -from src.db.models.impl.flag.url_validated.enums import URLType -from tests.helpers.batch_creation_parameters.annotation_info import AnnotationInfo -from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters -from tests.helpers.batch_creation_parameters.enums import URLCreationEnum -from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters - - -@pytest_asyncio.fixture -async def batch_url_creation_info(db_data_creator): - - parameters = TestBatchCreationParameters( - urls=[ - TestURLCreationParameters( - count=2, - status=URLCreationEnum.OK, - annotation_info=AnnotationInfo( - user_relevant=URLType.DATA_SOURCE, - user_record_type=RecordType.ARREST_RECORDS, - user_agency=URLAgencyAnnotationPostInfo( - suggested_agency=await db_data_creator.agency() - ) - ) - ) - ] - ) - - return await db_data_creator.batch_v2(parameters=parameters) diff --git a/tests/automated/integration/api/review/rejection/__init__.py b/tests/automated/integration/api/review/rejection/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/automated/integration/api/review/rejection/helpers.py b/tests/automated/integration/api/review/rejection/helpers.py deleted file mode 100644 index f9619747..00000000 --- a/tests/automated/integration/api/review/rejection/helpers.py +++ /dev/null @@ -1,39 +0,0 @@ -from src.api.endpoints.review.enums import RejectionReason -from src.api.endpoints.review.next.dto import GetNextURLForFinalReviewOuterResponse -from src.api.endpoints.review.reject.dto import FinalReviewRejectionInfo -from src.collectors.enums import URLStatus -from src.db.models.impl.url.core.sqlalchemy import URL -from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review - - -async def run_rejection_test( - api_test_helper, - rejection_reason: RejectionReason, - url_status: URLStatus -): - ath = api_test_helper - db_data_creator = ath.db_data_creator - - setup_info = await setup_for_get_next_url_for_final_review( - db_data_creator=db_data_creator, - annotation_count=3, - include_user_annotations=True - ) - url_mapping = setup_info.url_mapping - - result: GetNextURLForFinalReviewOuterResponse = await ath.request_validator.reject_and_get_next_source_for_review( - review_info=FinalReviewRejectionInfo( - url_id=url_mapping.url_id, - rejection_reason=rejection_reason - ) - ) - - assert result.next_source is None - - adb_client = db_data_creator.adb_client - # Confirm same agency id is listed as rejected - urls: list[URL] = await adb_client.get_all(URL) - assert len(urls) == 1 - url = urls[0] - assert url.id == url_mapping.url_id - assert url.status == url_status diff --git a/tests/automated/integration/api/review/rejection/test_broken_page.py b/tests/automated/integration/api/review/rejection/test_broken_page.py deleted file mode 100644 index 813e523a..00000000 --- a/tests/automated/integration/api/review/rejection/test_broken_page.py +++ /dev/null @@ -1,14 +0,0 @@ -import pytest - -from src.api.endpoints.review.enums import RejectionReason -from src.collectors.enums import URLStatus -from tests.automated.integration.api.review.rejection.helpers import run_rejection_test - - -@pytest.mark.asyncio -async def test_rejection_broken_page(api_test_helper): - await run_rejection_test( - api_test_helper, - rejection_reason=RejectionReason.BROKEN_PAGE_404, - url_status=URLStatus.NOT_FOUND - ) diff --git a/tests/automated/integration/api/review/rejection/test_individual_record.py b/tests/automated/integration/api/review/rejection/test_individual_record.py deleted file mode 100644 index fd1b8231..00000000 --- a/tests/automated/integration/api/review/rejection/test_individual_record.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest - -from src.api.endpoints.review.enums import RejectionReason -from src.collectors.enums import URLStatus -from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from tests.automated.integration.api.review.rejection.helpers import run_rejection_test -from tests.helpers.api_test_helper import APITestHelper - - -@pytest.mark.asyncio -async def test_rejection_individual_record(api_test_helper: APITestHelper): - await run_rejection_test( - api_test_helper, - rejection_reason=RejectionReason.INDIVIDUAL_RECORD, - url_status=URLStatus.OK - ) - - # Get FlagURLValidated and confirm Individual Record - flag: FlagURLValidated = (await api_test_helper.adb_client().get_all(FlagURLValidated))[0] - assert flag.type == URLType.INDIVIDUAL_RECORD - diff --git a/tests/automated/integration/api/review/rejection/test_not_relevant.py b/tests/automated/integration/api/review/rejection/test_not_relevant.py deleted file mode 100644 index 2cb95704..00000000 --- a/tests/automated/integration/api/review/rejection/test_not_relevant.py +++ /dev/null @@ -1,20 +0,0 @@ -import pytest - -from src.api.endpoints.review.enums import RejectionReason -from src.collectors.enums import URLStatus -from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from tests.automated.integration.api.review.rejection.helpers import run_rejection_test - - -@pytest.mark.asyncio -async def test_rejection_not_relevant(api_test_helper): - await run_rejection_test( - api_test_helper, - rejection_reason=RejectionReason.NOT_RELEVANT, - url_status=URLStatus.OK - ) - - # Get FlagURLValidated and confirm Not Relevant - flag: FlagURLValidated = (await api_test_helper.adb_client().get_all(FlagURLValidated))[0] - assert flag.type == URLType.NOT_RELEVANT \ No newline at end of file diff --git a/tests/automated/integration/api/review/test_approve_and_get_next_source.py b/tests/automated/integration/api/review/test_approve_and_get_next_source.py deleted file mode 100644 index 858df360..00000000 --- a/tests/automated/integration/api/review/test_approve_and_get_next_source.py +++ /dev/null @@ -1,81 +0,0 @@ -import pytest - -from src.api.endpoints.review.approve.dto import FinalReviewApprovalInfo -from src.api.endpoints.review.next.dto import GetNextURLForFinalReviewOuterResponse -from src.collectors.enums import URLStatus -from src.core.enums import RecordType -from src.db.constants import PLACEHOLDER_AGENCY_NAME -from src.db.models.impl.agency.sqlalchemy import Agency -from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.optional_data_source_metadata import URLOptionalDataSourceMetadata -from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType -from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review - - -@pytest.mark.asyncio -async def test_approve_and_get_next_source_for_review(api_test_helper): - ath = api_test_helper - db_data_creator = ath.db_data_creator - - setup_info = await setup_for_get_next_url_for_final_review( - db_data_creator=db_data_creator, - include_user_annotations=True - ) - url_mapping = setup_info.url_mapping - - # Add confirmed agency - await db_data_creator.confirmed_suggestions([url_mapping.url_id]) - - - agency_ids = [await db_data_creator.agency() for _ in range(3)] - - result: GetNextURLForFinalReviewOuterResponse = await ath.request_validator.approve_and_get_next_source_for_review( - approval_info=FinalReviewApprovalInfo( - url_id=url_mapping.url_id, - record_type=RecordType.ARREST_RECORDS, - agency_ids=agency_ids, - name="New Test Name", - description="New Test Description", - record_formats=["New Test Record Format", "New Test Record Format 2"], - data_portal_type="New Test Data Portal Type", - supplying_entity="New Test Supplying Entity" - ) - ) - - assert result.remaining == 0 - assert result.next_source is None - - adb_client = db_data_creator.adb_client - # Confirm same agency id is listed as confirmed - urls: list[URL] = await adb_client.get_all(URL) - assert len(urls) == 1 - url = urls[0] - assert url.id == url_mapping.url_id - assert url.status == URLStatus.OK - assert url.name == "New Test Name" - assert url.description == "New Test Description" - - record_types: list[URLRecordType] = await adb_client.get_all(URLRecordType) - assert len(record_types) == 1 - assert record_types[0].record_type == RecordType.ARREST_RECORDS - - optional_metadata = await adb_client.get_all(URLOptionalDataSourceMetadata) - assert len(optional_metadata) == 1 - assert optional_metadata[0].data_portal_type == "New Test Data Portal Type" - assert optional_metadata[0].supplying_entity == "New Test Supplying Entity" - assert optional_metadata[0].record_formats == ["New Test Record Format", "New Test Record Format 2"] - - # Get agencies - confirmed_agencies = await adb_client.get_all(LinkURLAgency) - assert len(confirmed_agencies) == 3 - for agency in confirmed_agencies: - assert agency.agency_id in agency_ids - - - # Confirm presence of FlagURLValidated - flag_url_validated = await adb_client.get_all(FlagURLValidated) - assert len(flag_url_validated) == 1 - assert flag_url_validated[0].type == URLType.DATA_SOURCE \ No newline at end of file diff --git a/tests/automated/integration/api/review/test_batch_filtering.py b/tests/automated/integration/api/review/test_batch_filtering.py deleted file mode 100644 index 481f7e90..00000000 --- a/tests/automated/integration/api/review/test_batch_filtering.py +++ /dev/null @@ -1,40 +0,0 @@ -import pytest - -from src.collectors.enums import URLStatus -from src.db.dtos.url.mapping import URLMapping -from tests.helpers.data_creator.core import DBDataCreator -from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo - - -@pytest.mark.asyncio -async def test_batch_filtering( - batch_url_creation_info: BatchURLCreationInfo, - api_test_helper -): - ath = api_test_helper - rv = ath.request_validator - - dbdc: DBDataCreator = ath.db_data_creator - - batch_id: int = batch_url_creation_info.batch_id - - validated_url_mappings: list[URLMapping] = await dbdc.create_validated_urls(count=4) - validated_url_ids: list[int] = [url_mapping.url_id for url_mapping in validated_url_mappings] - await dbdc.create_batch_url_links( - url_ids=validated_url_ids, - batch_id=batch_id - ) - - # Receive null batch info if batch id not provided - outer_result_no_batch_info = await rv.review_next_source() - assert outer_result_no_batch_info.next_source.batch_info is None - - # Get batch info if batch id is provided - outer_result = await ath.request_validator.review_next_source( - batch_id=batch_id - ) - assert outer_result.remaining == 2 - batch_info = outer_result.next_source.batch_info - assert batch_info.count_reviewed == 4 - assert batch_info.count_ready_for_review == 2 - diff --git a/tests/automated/integration/api/review/test_next_source.py b/tests/automated/integration/api/review/test_next_source.py deleted file mode 100644 index 47b9d710..00000000 --- a/tests/automated/integration/api/review/test_next_source.py +++ /dev/null @@ -1,67 +0,0 @@ -import pytest - -from src.core.enums import RecordType -from src.db.models.impl.flag.url_validated.enums import URLType -from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review - - -@pytest.mark.asyncio -async def test_review_next_source(api_test_helper): - ath = api_test_helper - - setup_info = await setup_for_get_next_url_for_final_review( - db_data_creator=ath.db_data_creator, - include_user_annotations=True - ) - url_mapping = setup_info.url_mapping - - await ath.db_data_creator.agency_auto_suggestions( - url_id=url_mapping.url_id, - count=3 - ) - confirmed_agency_id = await ath.db_data_creator.agency_confirmed_suggestion(url_id=url_mapping.url_id) - - outer_result = await ath.request_validator.review_next_source() - assert outer_result.remaining == 1 - - result = outer_result.next_source - - assert result.name == "Test Name" - assert result.description == "Test Description" - - optional_metadata = result.optional_metadata - - assert optional_metadata.data_portal_type == "Test Data Portal Type" - assert optional_metadata.supplying_entity == "Test Supplying Entity" - assert optional_metadata.record_formats == ["Test Record Format", "Test Record Format 2"] - - assert result.url == url_mapping.url - html_info = result.html_info - assert html_info.description == "test description" - assert html_info.title == "test html content" - - annotation_info = result.annotations - relevant_info = annotation_info.relevant - assert relevant_info.auto.is_relevant == True - assert relevant_info.user == {URLType.NOT_RELEVANT: 1} - - record_type_info = annotation_info.record_type - assert record_type_info.auto == RecordType.ARREST_RECORDS - assert record_type_info.user == {RecordType.ACCIDENT_REPORTS: 1} - - agency_info = annotation_info.agency - auto_agency_suggestions = agency_info.auto - assert auto_agency_suggestions.unknown == False - assert len(auto_agency_suggestions.suggestions) == 3 - - # Check user agency suggestions exist and in descending order of count - user_agency_suggestion = agency_info.user - assert user_agency_suggestion[0].suggestion.pdap_agency_id == setup_info.user_agency_id - assert user_agency_suggestion[0].user_count == 1 - - - # Check confirmed agencies exist - confirmed_agencies = agency_info.confirmed - assert len(confirmed_agencies) == 1 - confirmed_agency = confirmed_agencies[0] - assert confirmed_agency.pdap_agency_id == confirmed_agency_id diff --git a/tests/automated/integration/db/client/get_next_url_for_final_review/__init__.py b/tests/automated/integration/db/client/get_next_url_for_final_review/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/automated/integration/db/client/get_next_url_for_final_review/test_basic.py b/tests/automated/integration/db/client/get_next_url_for_final_review/test_basic.py deleted file mode 100644 index 0d461f23..00000000 --- a/tests/automated/integration/db/client/get_next_url_for_final_review/test_basic.py +++ /dev/null @@ -1,54 +0,0 @@ -import pytest - -from src.core.enums import RecordType -from src.db.models.impl.flag.url_validated.enums import URLType -from tests.helpers.data_creator.core import DBDataCreator -from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review - - -@pytest.mark.asyncio -async def test_get_next_url_for_final_review_basic(db_data_creator: DBDataCreator): - """ - Test that an annotated URL is returned - """ - - setup_info = await setup_for_get_next_url_for_final_review( - db_data_creator=db_data_creator, - annotation_count=1, - include_user_annotations=True - ) - - url_mapping = setup_info.url_mapping - # Add agency auto suggestions - await db_data_creator.agency_auto_suggestions( - url_id=url_mapping.url_id, - count=3 - ) - - - outer_result = await db_data_creator.adb_client.get_next_url_for_final_review( - batch_id=None - ) - result = outer_result.next_source - - assert result.url == url_mapping.url - html_info = result.html_info - assert html_info.description == "test description" - assert html_info.title == "test html content" - - annotation_info = result.annotations - relevant_info = annotation_info.relevant - assert relevant_info.auto.is_relevant == True - assert relevant_info.user == {URLType.NOT_RELEVANT: 1} - - record_type_info = annotation_info.record_type - assert record_type_info.auto == RecordType.ARREST_RECORDS - assert record_type_info.user == {RecordType.ACCIDENT_REPORTS: 1} - - agency_info = annotation_info.agency - auto_agency_suggestions = agency_info.auto - assert auto_agency_suggestions.unknown == False - assert len(auto_agency_suggestions.suggestions) == 3 - - # Check user agency suggestion exists and is correct - assert agency_info.user[0].suggestion.pdap_agency_id == setup_info.user_agency_id diff --git a/tests/automated/integration/db/client/get_next_url_for_final_review/test_batch_id_filtering.py b/tests/automated/integration/db/client/get_next_url_for_final_review/test_batch_id_filtering.py deleted file mode 100644 index ad4fe3d6..00000000 --- a/tests/automated/integration/db/client/get_next_url_for_final_review/test_batch_id_filtering.py +++ /dev/null @@ -1,36 +0,0 @@ -import pytest - -from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review -from tests.helpers.data_creator.core import DBDataCreator - - -@pytest.mark.asyncio -async def test_get_next_url_for_final_review_batch_id_filtering(db_data_creator: DBDataCreator): - setup_info_1 = await setup_for_get_next_url_for_final_review( - db_data_creator=db_data_creator, - annotation_count=3, - include_user_annotations=True - ) - - setup_info_2 = await setup_for_get_next_url_for_final_review( - db_data_creator=db_data_creator, - annotation_count=3, - include_user_annotations=True - ) - - url_mapping_1 = setup_info_1.url_mapping - url_mapping_2 = setup_info_2.url_mapping - - # If a batch id is provided, return first valid URL with that batch id - result_with_batch_id = await db_data_creator.adb_client.get_next_url_for_final_review( - batch_id=setup_info_2.batch_id - ) - - assert result_with_batch_id.next_source.url == url_mapping_2.url - - # If no batch id is provided, return first valid URL - result_no_batch_id =await db_data_creator.adb_client.get_next_url_for_final_review( - batch_id=None - ) - - assert result_no_batch_id.next_source.url == url_mapping_1.url diff --git a/tests/automated/integration/db/client/get_next_url_for_final_review/test_favor_more_components.py b/tests/automated/integration/db/client/get_next_url_for_final_review/test_favor_more_components.py deleted file mode 100644 index 38e0527c..00000000 --- a/tests/automated/integration/db/client/get_next_url_for_final_review/test_favor_more_components.py +++ /dev/null @@ -1,42 +0,0 @@ -import pytest - -from src.core.enums import SuggestionType -from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review -from tests.helpers.data_creator.core import DBDataCreator - - -@pytest.mark.asyncio -async def test_get_next_url_for_final_review_favor_more_components(db_data_creator: DBDataCreator): - """ - Test in the case of two URLs, favoring the one with more annotations for more components - i.e., if one has annotations for record type and agency id, that should be favored over one with just record type - """ - - setup_info_without_user_anno = await setup_for_get_next_url_for_final_review( - db_data_creator=db_data_creator, - annotation_count=3, - include_user_annotations=False - ) - url_mapping_without_user_anno = setup_info_without_user_anno.url_mapping - - setup_info_with_user_anno = await setup_for_get_next_url_for_final_review( - db_data_creator=db_data_creator, - annotation_count=3, - include_user_annotations=True - ) - url_mapping_with_user_anno = setup_info_with_user_anno.url_mapping - - # Have both be listed as unknown - - for url_mapping in [url_mapping_with_user_anno, url_mapping_without_user_anno]: - await db_data_creator.agency_auto_suggestions( - url_id=url_mapping.url_id, - count=3, - suggestion_type=SuggestionType.UNKNOWN - ) - - result = await db_data_creator.adb_client.get_next_url_for_final_review( - batch_id=None - ) - - assert result.next_source.id == url_mapping_with_user_anno.url_id diff --git a/tests/automated/integration/db/client/get_next_url_for_final_review/test_not_annotations.py b/tests/automated/integration/db/client/get_next_url_for_final_review/test_not_annotations.py deleted file mode 100644 index b278352c..00000000 --- a/tests/automated/integration/db/client/get_next_url_for_final_review/test_not_annotations.py +++ /dev/null @@ -1,19 +0,0 @@ -import pytest - -from tests.helpers.data_creator.core import DBDataCreator - - -@pytest.mark.asyncio -async def test_get_next_url_for_final_review_no_annotations(db_data_creator: DBDataCreator): - """ - Test in the case of one URL with no annotations. - No annotations should be returned - """ - batch_id = db_data_creator.batch() - url_mapping = db_data_creator.urls(batch_id=batch_id, url_count=1).url_mappings[0] - - result = await db_data_creator.adb_client.get_next_url_for_final_review( - batch_id=None - ) - - assert result.next_source is None diff --git a/tests/automated/integration/db/client/get_next_url_for_final_review/test_only_confirmed_urls.py b/tests/automated/integration/db/client/get_next_url_for_final_review/test_only_confirmed_urls.py deleted file mode 100644 index 72706aaf..00000000 --- a/tests/automated/integration/db/client/get_next_url_for_final_review/test_only_confirmed_urls.py +++ /dev/null @@ -1,25 +0,0 @@ -import pytest - -from src.collectors.enums import URLStatus -from tests.helpers.batch_creation_parameters.enums import URLCreationEnum -from tests.helpers.data_creator.core import DBDataCreator - - -@pytest.mark.asyncio -async def test_get_next_url_for_final_review_only_confirmed_urls(db_data_creator: DBDataCreator): - """ - Test in the case of one URL that is submitted - Should not be returned. - """ - batch_id = db_data_creator.batch() - url_mapping = db_data_creator.urls( - batch_id=batch_id, - url_count=1, - outcome=URLCreationEnum.SUBMITTED - ).url_mappings[0] - - result = await db_data_creator.adb_client.get_next_url_for_final_review( - batch_id=None - ) - - assert result.next_source is None diff --git a/tests/helpers/data_creator/core.py b/tests/helpers/data_creator/core.py index 6cb3a271..cbeb207f 100644 --- a/tests/helpers/data_creator/core.py +++ b/tests/helpers/data_creator/core.py @@ -517,9 +517,6 @@ async def create_agency(self, agency_id: int = 1) -> None: agency = Agency( agency_id=agency_id, name=generate_test_name(agency_id), - state=None, - county=None, - locality=None, agency_type=AgencyType.UNKNOWN ) await self.adb_client.add_all([agency]) @@ -532,9 +529,6 @@ async def create_agencies(self, count: int = 3) -> list[int]: agency = Agency( agency_id=agency_id, name=generate_test_name(agency_id), - state=None, - county=None, - locality=None, agency_type=AgencyType.UNKNOWN ) agencies.append(agency)