From cebf08537c39ad67fb54a722db690c96a6f1a9b8 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Mon, 13 Oct 2025 18:58:41 -0400 Subject: [PATCH 01/24] Begin draft --- pyproject.toml | 2 +- src/external/pdap/client.py | 9 ++++++++- src/external/pdap/impl/follows/__init__.py | 0 src/external/pdap/impl/follows/core.py | 20 +++++++++++++++++++ src/external/pdap/impl/follows/response.py | 11 ++++++++++ src/security/manager.py | 5 ----- .../manual/external/pdap/test_get_follows.py | 7 +++++++ .../manual/external/pdap/test_match_agency.py | 1 + uv.lock | 8 ++++---- 9 files changed, 52 insertions(+), 11 deletions(-) create mode 100644 src/external/pdap/impl/follows/__init__.py create mode 100644 src/external/pdap/impl/follows/core.py create mode 100644 src/external/pdap/impl/follows/response.py create mode 100644 tests/manual/external/pdap/test_get_follows.py diff --git a/pyproject.toml b/pyproject.toml index 70f54673..0094d983 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ dependencies = [ "lxml~=5.1.0", "marshmallow~=3.23.2", "openai~=1.60.1", - "pdap-access-manager==0.3.6", + "pdap-access-manager==0.4.2", "pillow>=11.3.0", "pip>=25.2", "playwright~=1.49.1", diff --git a/src/external/pdap/client.py b/src/external/pdap/client.py index 1c950ad3..63a19fdd 100644 --- a/src/external/pdap/client.py +++ b/src/external/pdap/client.py @@ -7,6 +7,8 @@ from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse from src.external.pdap.dtos.unique_url_duplicate import UniqueURLDuplicateInfo from src.external.pdap.enums import MatchAgencyResponseStatus +from src.external.pdap.impl.follows.core import get_user_followed_locations +from src.external.pdap.impl.follows.response import GetFollowsResponse from src.external.pdap.impl.meta_urls.core import submit_meta_urls from src.external.pdap.impl.meta_urls.request import SubmitMetaURLsRequest @@ -156,4 +158,9 @@ async def submit_meta_urls( return await submit_meta_urls( self.access_manager, requests=requests - ) \ No newline at end of file + ) + + async def get_user_followed_locations(self) -> GetFollowsResponse: + return await get_user_followed_locations( + access_manager=self.access_manager + ) diff --git a/src/external/pdap/impl/follows/__init__.py b/src/external/pdap/impl/follows/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/follows/core.py b/src/external/pdap/impl/follows/core.py new file mode 100644 index 00000000..97281395 --- /dev/null +++ b/src/external/pdap/impl/follows/core.py @@ -0,0 +1,20 @@ +from pdap_access_manager import AccessManager, DataSourcesNamespaces, RequestInfo, RequestType, ResponseInfo + +from src.external.pdap.impl.follows.response import GetFollowsResponse, LinkUserFollow + + +async def get_user_followed_locations( + access_manager: AccessManager, +) -> GetFollowsResponse: + + url: str = f"{access_manager.data_sources_url}/v3/v2/source-collector/follows" + headers: dict[str, str] = await access_manager.jwt_header() + request_info = RequestInfo( + type_=RequestType.GET, + url=url, + headers=headers + ) + response_info: ResponseInfo = await access_manager.make_request(request_info) + return GetFollowsResponse( + **response_info.data + ) \ No newline at end of file diff --git a/src/external/pdap/impl/follows/response.py b/src/external/pdap/impl/follows/response.py new file mode 100644 index 00000000..a37894f5 --- /dev/null +++ b/src/external/pdap/impl/follows/response.py @@ -0,0 +1,11 @@ +from pydantic import BaseModel + + +class LinkUserFollow(BaseModel): + user_id: int + location_id: int + + +class GetFollowsResponse(BaseModel): + follows: list[LinkUserFollow] + diff --git a/src/security/manager.py b/src/security/manager.py index 16f0519e..2a84f046 100644 --- a/src/security/manager.py +++ b/src/security/manager.py @@ -68,8 +68,3 @@ def get_access_info( token: Annotated[str, Depends(oauth2_scheme)] ) -> AccessInfo: return SecurityManager().check_access(token, Permissions.SOURCE_COLLECTOR) - -def require_permission(permission: Permissions): - def dependency(token: Annotated[str, Depends(oauth2_scheme)]) -> AccessInfo: - return SecurityManager().check_access(token, permission=permission) - return dependency \ No newline at end of file diff --git a/tests/manual/external/pdap/test_get_follows.py b/tests/manual/external/pdap/test_get_follows.py new file mode 100644 index 00000000..08247277 --- /dev/null +++ b/tests/manual/external/pdap/test_get_follows.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.mark.asyncio +async def test_get_user_followed_locations(pdap_client_dev): + response = await pdap_client_dev.get_user_followed_locations() + print(response) \ No newline at end of file diff --git a/tests/manual/external/pdap/test_match_agency.py b/tests/manual/external/pdap/test_match_agency.py index a637dad0..8bc8d924 100644 --- a/tests/manual/external/pdap/test_match_agency.py +++ b/tests/manual/external/pdap/test_match_agency.py @@ -4,3 +4,4 @@ @pytest.mark.asyncio async def test_match_agency(pdap_client): response = await pdap_client.match_agency(name="police") + print(response) diff --git a/uv.lock b/uv.lock index e7f52cfd..0c85cca9 100644 --- a/uv.lock +++ b/uv.lock @@ -560,7 +560,7 @@ requires-dist = [ { name = "lxml", specifier = "~=5.1.0" }, { name = "marshmallow", specifier = "~=3.23.2" }, { name = "openai", specifier = "~=1.60.1" }, - { name = "pdap-access-manager", specifier = "==0.3.6" }, + { name = "pdap-access-manager", specifier = "==0.4.2" }, { name = "pillow", specifier = ">=11.3.0" }, { name = "pip", specifier = ">=25.2" }, { name = "playwright", specifier = "~=1.49.1" }, @@ -1591,7 +1591,7 @@ wheels = [ [[package]] name = "pdap-access-manager" -version = "0.3.6" +version = "0.4.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -1599,9 +1599,9 @@ dependencies = [ { name = "pydantic" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a4/14/d910483f08a0203a20fc2839738d9e27c83a66849fed422c3d4e804e15f5/pdap_access_manager-0.3.6.tar.gz", hash = "sha256:15c04f704e22116cd56b459e8a9d7f8514c75c36ca2c8a889b9ce2a308d88f6c", size = 4169, upload_time = "2025-06-12T20:14:55.942Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/48/c5567f65038c4c914cfc85a2772f66e5fae9c97076de4b12905266236650/pdap_access_manager-0.4.2.tar.gz", hash = "sha256:37a958689daf6a285c4ac989108b8e3c7c7f226032f6e684d1878648e86bd9c2", size = 5674, upload_time = "2025-10-13T21:47:53.861Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/81/76803339fd732cd3eda7458d48e67487d9377197f9ea7d4583df098823b2/pdap_access_manager-0.3.6-py3-none-any.whl", hash = "sha256:a5910068f642f7548d037bcb98657ca1945997fae4e89dc4e1d47283da485b91", size = 5034, upload_time = "2025-06-12T20:14:48.452Z" }, + { url = "https://files.pythonhosted.org/packages/cf/b0/ebca0ee05b13a47ce094c3a6a4fc5e0b148baa6ee89519722a46bcc27db9/pdap_access_manager-0.4.2-py3-none-any.whl", hash = "sha256:97d74575a4949b1aa6cd3265e4dd74a0014e451b95d0619c327e43b123bacf8b", size = 10742, upload_time = "2025-10-13T21:47:53.024Z" }, ] [[package]] From f9aa64b9410d6d93dca3caaafb15a231cd0d585a Mon Sep 17 00:00:00 2001 From: Max Chis Date: Sat, 6 Dec 2025 09:07:59 -0500 Subject: [PATCH 02/24] Add logic for validating anonymous name suggestions --- ...94049f_create_anonymous_annotation_name.py | 47 +++++++++++++ .../annotate/anonymous/post/query.py | 25 +++++++ .../validate/queries/ctes/counts/impl/name.py | 68 ++++++++++++++++--- .../anonymous_sessions__name_suggestion.py | 24 +++++++ .../api/annotate/anonymous/test_core.py | 13 +++- .../tasks/url/impl/validate/helper.py | 4 +- .../url/impl/validate/test_data_source.py | 10 ++- 7 files changed, 176 insertions(+), 15 deletions(-) create mode 100644 alembic/versions/2025_12_05_1721-dfb64594049f_create_anonymous_annotation_name.py create mode 100644 src/db/models/impl/link/anonymous_sessions__name_suggestion.py diff --git a/alembic/versions/2025_12_05_1721-dfb64594049f_create_anonymous_annotation_name.py b/alembic/versions/2025_12_05_1721-dfb64594049f_create_anonymous_annotation_name.py new file mode 100644 index 00000000..848b9e98 --- /dev/null +++ b/alembic/versions/2025_12_05_1721-dfb64594049f_create_anonymous_annotation_name.py @@ -0,0 +1,47 @@ +"""Create anonymous_annotation_name + +Revision ID: dfb64594049f +Revises: 1d3398f9cd8a +Create Date: 2025-12-05 17:21:35.134935 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import UUID + +from src.util.alembic_helpers import created_at_column + +# revision identifiers, used by Alembic. +revision: str = 'dfb64594049f' +down_revision: Union[str, None] = '1d3398f9cd8a' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "link__anonymous_sessions__name_suggestions", + sa.Column( + "session_id", + UUID, + sa.ForeignKey("anonymous_sessions.id"), + nullable=False + ), + sa.Column( + "suggestion_id", + sa.Integer(), + sa.ForeignKey("url_name_suggestions.id"), + nullable=False, + ), + created_at_column(), + sa.PrimaryKeyConstraint( + "session_id", + "suggestion_id" + ) + ) + + +def downgrade() -> None: + pass diff --git a/src/api/endpoints/annotate/anonymous/post/query.py b/src/api/endpoints/annotate/anonymous/post/query.py index 593d79d9..29670c80 100644 --- a/src/api/endpoints/annotate/anonymous/post/query.py +++ b/src/api/endpoints/annotate/anonymous/post/query.py @@ -3,10 +3,13 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo +from src.db.models.impl.link.anonymous_sessions__name_suggestion import LinkAnonymousSessionNameSuggestion from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType +from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource +from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion from src.db.queries.base.builder import QueryBuilderBase @@ -31,6 +34,28 @@ async def run(self, session: AsyncSession) -> None: ) session.add(url_type_suggestion) + name_id: int | None + if self.post_info.name_info.new_name is not None: + name_suggestion = URLNameSuggestion( + url_id=self.url_id, + suggestion=self.post_info.name_info.new_name, + source=NameSuggestionSource.USER + ) + session.add(name_suggestion) + await session.flush() + name_id = name_suggestion.id + elif self.post_info.name_info.existing_name_id is not None: + name_id = self.post_info.name_info.existing_name_id + else: + name_id = None + + if name_id is not None: + name_suggestion = LinkAnonymousSessionNameSuggestion( + suggestion_id=name_id, + session_id=self.session_id + ) + session.add(name_suggestion) + if self.post_info.record_type is not None: record_type_suggestion = AnonymousAnnotationRecordType( url_id=self.url_id, diff --git a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py index 5cb014f1..4000e6e2 100644 --- a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py +++ b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py @@ -1,28 +1,76 @@ from sqlalchemy import select, func from src.core.tasks.url.operators.validate.queries.ctes.counts.core import ValidatedCountsCTEContainer +from src.db.models.impl.link.anonymous_sessions__name_suggestion import LinkAnonymousSessionNameSuggestion from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion from src.db.models.views.unvalidated_url import UnvalidatedURL +_user_counts = ( + select( + URLNameSuggestion.url_id, + URLNameSuggestion.suggestion.label("entity"), + func.count().label("votes") + ) + .join( + LinkUserNameSuggestion, + LinkUserNameSuggestion.suggestion_id == URLNameSuggestion.id + ) + .group_by( + URLNameSuggestion.url_id, + URLNameSuggestion.suggestion + ) + .cte("user_counts") +) + +_anon_counts = ( + select( + URLNameSuggestion.url_id, + URLNameSuggestion.suggestion.label("entity"), + func.count().label("votes") + ) + .join( + LinkAnonymousSessionNameSuggestion, + LinkAnonymousSessionNameSuggestion.suggestion_id == URLNameSuggestion.id + ) + .group_by( + URLNameSuggestion.url_id, + URLNameSuggestion.suggestion + ) + .cte("anon_counts") +) + +_union_counts = ( + select( + _user_counts.c.url_id, + _user_counts.c.entity, + _user_counts.c.votes + ) + .union_all( + select( + _anon_counts.c.url_id, + _anon_counts.c.entity, + _anon_counts.c.votes + ) + ) + .cte("counts_name_union") +) + + NAME_VALIDATION_COUNTS_CTE = ValidatedCountsCTEContainer( ( select( - URLNameSuggestion.url_id, - URLNameSuggestion.suggestion.label("entity"), - func.count().label("votes") + _union_counts.c.url_id, + _union_counts.c.entity, + func.sum(_union_counts.c.votes).label("votes") ) .join( UnvalidatedURL, - URLNameSuggestion.url_id == UnvalidatedURL.url_id - ) - .join( - LinkUserNameSuggestion, - LinkUserNameSuggestion.suggestion_id == URLNameSuggestion.id + _union_counts.c.url_id == UnvalidatedURL.url_id ) .group_by( - URLNameSuggestion.url_id, - URLNameSuggestion.suggestion + _union_counts.c.url_id, + _union_counts.c.entity, ) ).cte("counts_name") ) \ No newline at end of file diff --git a/src/db/models/impl/link/anonymous_sessions__name_suggestion.py b/src/db/models/impl/link/anonymous_sessions__name_suggestion.py new file mode 100644 index 00000000..a5773bd7 --- /dev/null +++ b/src/db/models/impl/link/anonymous_sessions__name_suggestion.py @@ -0,0 +1,24 @@ +from sqlalchemy import PrimaryKeyConstraint, ForeignKey, Integer, Column + +from src.db.models.mixins import CreatedAtMixin, AnonymousSessionMixin +from src.db.models.templates_.base import Base + + +class LinkAnonymousSessionNameSuggestion( + Base, + AnonymousSessionMixin, + CreatedAtMixin +): + __tablename__ = "link__anonymous_sessions__name_suggestions" + suggestion_id = Column( + Integer, + ForeignKey("url_name_suggestions.id"), + primary_key=True, + nullable=False, + ) + __table_args__ = ( + PrimaryKeyConstraint( + "session_id", + "suggestion_id" + ), + ) \ No newline at end of file diff --git a/tests/automated/integration/api/annotate/anonymous/test_core.py b/tests/automated/integration/api/annotate/anonymous/test_core.py index b6fb93fa..9e3b9af9 100644 --- a/tests/automated/integration/api/annotate/anonymous/test_core.py +++ b/tests/automated/integration/api/annotate/anonymous/test_core.py @@ -3,7 +3,6 @@ import pytest from src.api.endpoints.annotate.all.get.models.name import NameAnnotationSuggestion -from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.api.endpoints.annotate.all.post.models.agency import AnnotationPostAgencyInfo from src.api.endpoints.annotate.all.post.models.location import AnnotationPostLocationInfo from src.api.endpoints.annotate.all.post.models.name import AnnotationPostNameInfo @@ -12,10 +11,12 @@ from src.core.enums import RecordType from src.db.dtos.url.mapping_.simple import SimpleURLMapping from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.impl.link.anonymous_sessions__name_suggestion import LinkAnonymousSessionNameSuggestion from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType +from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion from src.db.models.mixins import URLDependentMixin from tests.automated.integration.api.annotate.anonymous.helper import get_next_url_for_anonymous_annotation, \ post_and_get_next_url_for_anonymous_annotation @@ -90,6 +91,16 @@ async def test_annotate_anonymous( instance: model = instances[0] assert instance.url_id == get_response_1.next_annotation.url_info.url_id + # Check for existence of name suggestion (2 were added by setup) + name_suggestions: list[URLNameSuggestion] = await ddc.adb_client.get_all(URLNameSuggestion) + assert len(name_suggestions) == 3 + + # Check for existence of link + link_instances: list[LinkAnonymousSessionNameSuggestion] = await ddc.adb_client.get_all(LinkAnonymousSessionNameSuggestion) + assert len(link_instances) == 1 + link_instance: LinkAnonymousSessionNameSuggestion = link_instances[0] + assert link_instance.session_id == session_id + # Run again without giving session ID, confirm original URL returned get_response_2: GetNextURLForAnonymousAnnotationResponse = await get_next_url_for_anonymous_annotation(rv) assert get_response_2.session_id != session_id diff --git a/tests/automated/integration/tasks/url/impl/validate/helper.py b/tests/automated/integration/tasks/url/impl/validate/helper.py index 879fbc66..091fe5fa 100644 --- a/tests/automated/integration/tasks/url/impl/validate/helper.py +++ b/tests/automated/integration/tasks/url/impl/validate/helper.py @@ -132,7 +132,7 @@ async def add_record_type_suggestions( async def add_name_suggestion( self, count: int = 1, - ) -> str: + ) -> int: name = f"Test Validate Task Name" suggestion_id: int = await self.db_data_creator.name_suggestion( url_id=self.url_id, @@ -144,7 +144,7 @@ async def add_name_suggestion( suggestion_id=suggestion_id, user_id=next_int(), ) - return name + return suggestion_id async def check_name(self) -> None: urls: list[URL] = await self.adb_client.get_all(URL) diff --git a/tests/automated/integration/tasks/url/impl/validate/test_data_source.py b/tests/automated/integration/tasks/url/impl/validate/test_data_source.py index 4fe0d444..434e8f06 100644 --- a/tests/automated/integration/tasks/url/impl/validate/test_data_source.py +++ b/tests/automated/integration/tasks/url/impl/validate/test_data_source.py @@ -13,6 +13,7 @@ from src.core.enums import RecordType from src.core.tasks.url.operators.validate.core import AutoValidateURLTaskOperator from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.impl.link.anonymous_sessions__name_suggestion import LinkAnonymousSessionNameSuggestion from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType @@ -45,7 +46,7 @@ async def test_data_source( assert not await operator.meets_task_prerequisites() - await helper.add_name_suggestion(count=2) + suggestion_id: int = await helper.add_name_suggestion(count=1) assert not await operator.meets_task_prerequisites() @@ -74,11 +75,16 @@ async def test_data_source( session_id=session_id, url_id=helper.url_id ) + anon_name_link = LinkAnonymousSessionNameSuggestion( + suggestion_id=suggestion_id, + session_id=session_id + ) for model in [ anon_url_type, anon_record_type, anon_location, - anon_agency + anon_agency, + anon_name_link ]: await helper.adb_client.add(model) From 1015bdf5e6c5b802d5e4725a61dfca94f73ab8cb Mon Sep 17 00:00:00 2001 From: Max Chis Date: Wed, 17 Dec 2025 14:53:27 -0500 Subject: [PATCH 03/24] Remove special permission access for annotate endpoint --- src/api/endpoints/annotate/routes.py | 6 +++--- src/security/manager.py | 5 +++++ tests/automated/integration/conftest.py | 3 ++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/api/endpoints/annotate/routes.py b/src/api/endpoints/annotate/routes.py index 1633eb5a..ee3cc3c7 100644 --- a/src/api/endpoints/annotate/routes.py +++ b/src/api/endpoints/annotate/routes.py @@ -15,7 +15,7 @@ from src.core.core import AsyncCore from src.db.queries.implementations.anonymous_session import MakeAnonymousSessionQueryBuilder from src.security.dtos.access_info import AccessInfo -from src.security.manager import get_access_info +from src.security.manager import get_access_info, get_standard_user_access_info annotate_router = APIRouter( prefix="/annotate", @@ -76,7 +76,7 @@ async def annotate_url_for_all_annotations_and_get_next_url_anonymous( @annotate_router.get("/all") async def get_next_url_for_all_annotations( - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_standard_user_access_info), async_core: AsyncCore = Depends(get_async_core), batch_id: int | None = batch_query, anno_url_id: int | None = url_id_query @@ -92,7 +92,7 @@ async def annotate_url_for_all_annotations_and_get_next_url( url_id: int, all_annotation_post_info: AllAnnotationPostInfo, async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_standard_user_access_info), batch_id: int | None = batch_query, anno_url_id: int | None = url_id_query ) -> GetNextURLForAllAnnotationResponse: diff --git a/src/security/manager.py b/src/security/manager.py index 16f0519e..abeade07 100644 --- a/src/security/manager.py +++ b/src/security/manager.py @@ -69,6 +69,11 @@ def get_access_info( ) -> AccessInfo: return SecurityManager().check_access(token, Permissions.SOURCE_COLLECTOR) +def get_standard_user_access_info( + token: Annotated[str, Depends(oauth2_scheme)] +) -> AccessInfo: + return SecurityManager().validate_token(token) + def require_permission(permission: Permissions): def dependency(token: Annotated[str, Depends(oauth2_scheme)]) -> AccessInfo: return SecurityManager().check_access(token, permission=permission) diff --git a/tests/automated/integration/conftest.py b/tests/automated/integration/conftest.py index 19a9fe19..22537d20 100644 --- a/tests/automated/integration/conftest.py +++ b/tests/automated/integration/conftest.py @@ -19,7 +19,7 @@ from src.db.models.impl.url.core.sqlalchemy import URL from src.security.dtos.access_info import AccessInfo from src.security.enums import Permissions -from src.security.manager import get_access_info +from src.security.manager import get_access_info, get_standard_user_access_info from tests.automated.integration.api._helpers.RequestValidator import RequestValidator from tests.helpers.api_test_helper import APITestHelper from tests.helpers.data_creator.core import DBDataCreator @@ -135,6 +135,7 @@ def override_access_info() -> AccessInfo: def client(disable_task_flags) -> Generator[TestClient, None, None]: with TestClient(app) as c: app.dependency_overrides[get_access_info] = override_access_info + app.dependency_overrides[get_standard_user_access_info] = override_access_info async_core: AsyncCore = c.app.state.async_core # Interfaces to the web should be mocked From a053d16ae04b6e1f5efee418d6c88dec7417f252 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Wed, 17 Dec 2025 20:04:25 -0500 Subject: [PATCH 04/24] Remove special permission requirements --- src/api/endpoints/contributions/routes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/api/endpoints/contributions/routes.py b/src/api/endpoints/contributions/routes.py index c6fdc739..457ece29 100644 --- a/src/api/endpoints/contributions/routes.py +++ b/src/api/endpoints/contributions/routes.py @@ -7,7 +7,7 @@ from src.api.endpoints.contributions.user.response import ContributionsUserResponse from src.core.core import AsyncCore from src.security.dtos.access_info import AccessInfo -from src.security.manager import get_access_info +from src.security.manager import get_access_info, get_standard_user_access_info contributions_router = APIRouter( prefix="/contributions", @@ -17,7 +17,7 @@ @contributions_router.get("/leaderboard") async def get_leaderboard( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_standard_user_access_info) ) -> ContributionsLeaderboardResponse: """Returns the leaderboard of user contributions.""" return await core.adb_client.run_query_builder( @@ -27,7 +27,7 @@ async def get_leaderboard( @contributions_router.get("/user") async def get_user_contributions( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_standard_user_access_info) ) -> ContributionsUserResponse: """Get contributions for the user and how often their annotations agreed with the final validation of URLs. From 749c2b5089a0231a43b2622cd97701048e560da7 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Thu, 18 Dec 2025 09:38:22 -0500 Subject: [PATCH 05/24] Privilege manually-submitted URLs for annotation sorting --- .../annotate/_shared/queries/helper.py | 57 +++++++++++++++++++ .../annotate/all/get/queries/core.py | 41 +++---------- .../endpoints/annotate/anonymous/get/query.py | 30 ++-------- src/api/endpoints/contributions/routes.py | 2 +- .../api/annotate/all/test_sorting.py | 48 ++++++++++++++++ .../data_creator/commands/impl/urls_/query.py | 6 +- tests/helpers/data_creator/core.py | 2 + tests/helpers/setup/final_review/core.py | 7 ++- 8 files changed, 131 insertions(+), 62 deletions(-) create mode 100644 src/api/endpoints/annotate/_shared/queries/helper.py create mode 100644 tests/automated/integration/api/annotate/all/test_sorting.py diff --git a/src/api/endpoints/annotate/_shared/queries/helper.py b/src/api/endpoints/annotate/_shared/queries/helper.py new file mode 100644 index 00000000..f5bf55eb --- /dev/null +++ b/src/api/endpoints/annotate/_shared/queries/helper.py @@ -0,0 +1,57 @@ +""" +This module contains helper functions for the annotate GET queries +""" + +from sqlalchemy import Select, case +from sqlalchemy.orm import joinedload + +from src.db.models.impl.url.core.enums import URLSource +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.views.unvalidated_url import UnvalidatedURL +from src.db.models.views.url_anno_count import URLAnnotationCount +from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView + + +def get_select() -> Select: + return ( + Select(URL) + # URL Must be unvalidated + .join( + UnvalidatedURL, + UnvalidatedURL.url_id == URL.id + ) + .join( + URLAnnotationFlagsView, + URLAnnotationFlagsView.url_id == URL.id + ) + .join( + URLAnnotationCount, + URLAnnotationCount.url_id == URL.id + ) + ) + +def conclude(query: Select) -> Select: + query = ( + # Add load options + query.options( + joinedload(URL.html_content), + joinedload(URL.user_relevant_suggestions), + joinedload(URL.user_record_type_suggestions), + joinedload(URL.name_suggestions), + ) + # Sorting Priority + .order_by( + # Privilege manually submitted URLs first + case( + (URL.source == URLSource.MANUAL, 0), + else_=1 + ).asc(), + # Break ties by favoring URL with higher total annotations + URLAnnotationCount.total_anno_count.desc(), + # Break additional ties by favoring least recently created URLs + URL.id.asc() + ) + # Limit to 1 result + .limit(1) + ) + return query \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/get/queries/core.py b/src/api/endpoints/annotate/all/get/queries/core.py index 89975a08..c63e8489 100644 --- a/src/api/endpoints/annotate/all/get/queries/core.py +++ b/src/api/endpoints/annotate/all/get/queries/core.py @@ -1,8 +1,8 @@ -from sqlalchemy import Select, exists, select +from sqlalchemy import exists, select from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import joinedload from src.api.endpoints.annotate._shared.extract import extract_and_format_get_annotation_result +from src.api.endpoints.annotate._shared.queries import helper from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended @@ -12,9 +12,6 @@ from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion -from src.db.models.views.unvalidated_url import UnvalidatedURL -from src.db.models.views.url_anno_count import URLAnnotationCount -from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView from src.db.queries.base.builder import QueryBuilderBase @@ -35,22 +32,9 @@ async def run( self, session: AsyncSession ) -> GetNextURLForAllAnnotationResponse: - query = ( - Select(URL) - # URL Must be unvalidated - .join( - UnvalidatedURL, - UnvalidatedURL.url_id == URL.id - ) - .join( - URLAnnotationFlagsView, - URLAnnotationFlagsView.url_id == URL.id - ) - .join( - URLAnnotationCount, - URLAnnotationCount.url_id == URL.id - ) - ) + query = helper.get_select() + + # Add user annotation-specific joins and conditions if self.batch_id is not None: query = query.join(LinkBatchURL).where(LinkBatchURL.batch_id == self.batch_id) if self.url_id is not None: @@ -102,18 +86,11 @@ async def run( ) ) ) - # Add load options - query = query.options( - joinedload(URL.html_content), - joinedload(URL.user_relevant_suggestions), - joinedload(URL.user_record_type_suggestions), - joinedload(URL.name_suggestions), - ) - query = query.order_by( - URLAnnotationCount.total_anno_count.desc(), - URL.id.asc() - ).limit(1) + + # Conclude query with limit and sorting + query = helper.conclude(query) + raw_results = (await session.execute(query)).unique() url: URL | None = raw_results.scalars().one_or_none() if url is None: diff --git a/src/api/endpoints/annotate/anonymous/get/query.py b/src/api/endpoints/annotate/anonymous/get/query.py index 041d5cda..ffe0a7b0 100644 --- a/src/api/endpoints/annotate/anonymous/get/query.py +++ b/src/api/endpoints/annotate/anonymous/get/query.py @@ -21,6 +21,7 @@ from src.db.models.views.url_anno_count import URLAnnotationCount from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView from src.db.queries.base.builder import QueryBuilderBase +from src.api.endpoints.annotate._shared.queries import helper class GetNextURLForAnonymousAnnotationQueryBuilder(QueryBuilderBase): @@ -33,22 +34,11 @@ def __init__( self.session_id = session_id async def run(self, session: AsyncSession) -> GetNextURLForAnonymousAnnotationResponse: + query = helper.get_select() + # Add anonymous annotation-specific conditions. query = ( - Select(URL) - # URL Must be unvalidated - .join( - UnvalidatedURL, - UnvalidatedURL.url_id == URL.id - ) - .join( - URLAnnotationFlagsView, - URLAnnotationFlagsView.url_id == URL.id - ) - .join( - URLAnnotationCount, - URLAnnotationCount.url_id == URL.id - ) + query .where( URL.status == URLStatus.OK.value, # Must not have been previously annotated by user @@ -77,18 +67,8 @@ async def run(self, session: AsyncSession) -> GetNextURLForAnonymousAnnotationRe ) ) ) - .options( - joinedload(URL.html_content), - joinedload(URL.user_relevant_suggestions), - joinedload(URL.user_record_type_suggestions), - joinedload(URL.name_suggestions), - ) - .order_by( - URLAnnotationCount.total_anno_count.desc(), - URL.id.asc() - ) - .limit(1) ) + query = helper.conclude(query) raw_results = (await session.execute(query)).unique() url: URL | None = raw_results.scalars().one_or_none() diff --git a/src/api/endpoints/contributions/routes.py b/src/api/endpoints/contributions/routes.py index 457ece29..a5032708 100644 --- a/src/api/endpoints/contributions/routes.py +++ b/src/api/endpoints/contributions/routes.py @@ -7,7 +7,7 @@ from src.api.endpoints.contributions.user.response import ContributionsUserResponse from src.core.core import AsyncCore from src.security.dtos.access_info import AccessInfo -from src.security.manager import get_access_info, get_standard_user_access_info +from src.security.manager import get_standard_user_access_info contributions_router = APIRouter( prefix="/contributions", diff --git a/tests/automated/integration/api/annotate/all/test_sorting.py b/tests/automated/integration/api/annotate/all/test_sorting.py new file mode 100644 index 00000000..a1c59813 --- /dev/null +++ b/tests/automated/integration/api/annotate/all/test_sorting.py @@ -0,0 +1,48 @@ +import pytest + +from src.db.models.impl.url.core.enums import URLSource +from tests.helpers.api_test_helper import APITestHelper +from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review +from tests.helpers.setup.final_review.model import FinalReviewSetupInfo + + +@pytest.mark.asyncio +async def test_annotate_sorting( + api_test_helper: APITestHelper, + +): + """ + Test that annotations are prioritized in the following order: + - Any manual submissions are prioritized first + - Then prioritize by number of annotations descending + - Then prioritize by URL ID ascending (e.g. least recently created) + """ + ath = api_test_helper + + # First URL created should be prioritized in absence of any other factors + setup_info_first_annotation: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, + include_user_annotations=False + ) + get_response_1 = await ath.request_validator.get_next_url_for_all_annotations() + assert get_response_1.next_annotation is not None + assert get_response_1.next_annotation.url_info.url_id == setup_info_first_annotation.url_mapping.url_id + + # ...But higher annotation count should take precedence over least recently created + setup_info_high_annotations: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, + include_user_annotations=True + ) + get_response_2 = await ath.request_validator.get_next_url_for_all_annotations() + assert get_response_2.next_annotation is not None + assert get_response_2.next_annotation.url_info.url_id == setup_info_high_annotations.url_mapping.url_id + + # ...But manual submissions should take precedence over higher annotation count + setup_info_manual_submission: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, + source=URLSource.MANUAL, + include_user_annotations=True + ) + get_response_3 = await ath.request_validator.get_next_url_for_all_annotations() + assert get_response_3.next_annotation is not None + assert get_response_3.next_annotation.url_info.url_id == setup_info_manual_submission.url_mapping.url_id diff --git a/tests/helpers/data_creator/commands/impl/urls_/query.py b/tests/helpers/data_creator/commands/impl/urls_/query.py index 1123af8e..c4fddad4 100644 --- a/tests/helpers/data_creator/commands/impl/urls_/query.py +++ b/tests/helpers/data_creator/commands/impl/urls_/query.py @@ -19,7 +19,8 @@ def __init__( url_count: int, collector_metadata: dict | None = None, status: URLCreationEnum = URLCreationEnum.OK, - created_at: datetime | None = None + created_at: datetime | None = None, + source: URLSource = URLSource.COLLECTOR ): super().__init__() self.batch_id = batch_id @@ -27,6 +28,7 @@ def __init__( self.collector_metadata = collector_metadata self.status = status self.created_at = created_at + self.source = source async def run(self) -> InsertURLsInfo: raise NotImplementedError @@ -45,7 +47,7 @@ def run_sync(self) -> InsertURLsInfo: ) else None, collector_metadata=self.collector_metadata, created_at=self.created_at, - source=URLSource.COLLECTOR + source=self.source ) ) diff --git a/tests/helpers/data_creator/core.py b/tests/helpers/data_creator/core.py index dd08a178..93ece6e1 100644 --- a/tests/helpers/data_creator/core.py +++ b/tests/helpers/data_creator/core.py @@ -266,6 +266,7 @@ def urls( url_count: int, collector_metadata: dict | None = None, outcome: URLCreationEnum = URLCreationEnum.OK, + source: URLSource = URLSource.COLLECTOR, created_at: datetime | None = None ) -> InsertURLsInfo: command = URLsDBDataCreatorCommand( @@ -273,6 +274,7 @@ def urls( url_count=url_count, collector_metadata=collector_metadata, status=outcome, + source=source, created_at=created_at ) return self.run_command_sync(command) diff --git a/tests/helpers/setup/final_review/core.py b/tests/helpers/setup/final_review/core.py index a3a3d42c..9acd733c 100644 --- a/tests/helpers/setup/final_review/core.py +++ b/tests/helpers/setup/final_review/core.py @@ -3,6 +3,7 @@ from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo from src.core.enums import RecordType from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.impl.url.core.enums import URLSource from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.setup.final_review.model import FinalReviewSetupInfo @@ -11,7 +12,8 @@ async def setup_for_get_next_url_for_final_review( db_data_creator: DBDataCreator, annotation_count: int | None = None, include_user_annotations: bool = True, - include_miscellaneous_metadata: bool = True + include_miscellaneous_metadata: bool = True, + source: URLSource = URLSource.COLLECTOR ) -> FinalReviewSetupInfo: """ Sets up the database to test the final_review functions @@ -22,7 +24,8 @@ async def setup_for_get_next_url_for_final_review( batch_id = db_data_creator.batch() url_mapping = db_data_creator.urls( batch_id=batch_id, - url_count=1 + url_count=1, + source=source ).url_mappings[0] if include_miscellaneous_metadata: await db_data_creator.url_miscellaneous_metadata(url_id=url_mapping.url_id) From d53e1c00a74c942d916b8d813a4c75852b9c74d2 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Thu, 18 Dec 2025 09:42:53 -0500 Subject: [PATCH 06/24] Remove unused test setup parameter --- tests/automated/integration/db/client/approve_url/test_basic.py | 1 - tests/automated/integration/db/client/approve_url/test_error.py | 1 - tests/helpers/setup/final_review/core.py | 1 - 3 files changed, 3 deletions(-) diff --git a/tests/automated/integration/db/client/approve_url/test_basic.py b/tests/automated/integration/db/client/approve_url/test_basic.py index f090a4ea..9421c1b7 100644 --- a/tests/automated/integration/db/client/approve_url/test_basic.py +++ b/tests/automated/integration/db/client/approve_url/test_basic.py @@ -17,7 +17,6 @@ async def test_approve_url_basic(db_data_creator: DBDataCreator): setup_info = await setup_for_get_next_url_for_final_review( db_data_creator=db_data_creator, - annotation_count=3, include_user_annotations=True ) url_mapping = setup_info.url_mapping diff --git a/tests/automated/integration/db/client/approve_url/test_error.py b/tests/automated/integration/db/client/approve_url/test_error.py index 352e737a..f358a74b 100644 --- a/tests/automated/integration/db/client/approve_url/test_error.py +++ b/tests/automated/integration/db/client/approve_url/test_error.py @@ -11,7 +11,6 @@ async def test_approval_url_error(db_data_creator: DBDataCreator): setup_info = await setup_for_get_next_url_for_final_review( db_data_creator=db_data_creator, - annotation_count=3, include_user_annotations=True, include_miscellaneous_metadata=False ) diff --git a/tests/helpers/setup/final_review/core.py b/tests/helpers/setup/final_review/core.py index 9acd733c..c474fe2c 100644 --- a/tests/helpers/setup/final_review/core.py +++ b/tests/helpers/setup/final_review/core.py @@ -10,7 +10,6 @@ async def setup_for_get_next_url_for_final_review( db_data_creator: DBDataCreator, - annotation_count: int | None = None, include_user_annotations: bool = True, include_miscellaneous_metadata: bool = True, source: URLSource = URLSource.COLLECTOR From 7240844ee8af2a3bbb979da7acb3ed9a42c1d78c Mon Sep 17 00:00:00 2001 From: Max Chis Date: Thu, 18 Dec 2025 11:21:19 -0500 Subject: [PATCH 07/24] Rename annotation tables to consistent nomenclature patterns --- ...rename_suggestion_tables_to_consistent_.py | 49 +++++++++++++++++++ .../suggestion/agency/subtask/sqlalchemy.py | 2 +- .../agency/suggestion/sqlalchemy.py | 4 +- .../models/impl/url/suggestion/agency/user.py | 3 +- .../suggestion/anonymous/agency/sqlalchemy.py | 2 +- .../anonymous/location/sqlalchemy.py | 2 +- .../anonymous/record_type/sqlalchemy.py | 2 +- .../anonymous/url_type/sqlalchemy.py | 2 +- .../location/auto/subtask/sqlalchemy.py | 2 +- .../location/auto/suggestion/sqlalchemy.py | 4 +- .../suggestion/location/user/sqlalchemy.py | 2 +- .../impl/url/suggestion/record_type/auto.py | 2 +- .../impl/url/suggestion/record_type/user.py | 2 +- .../suggestion/url_type/auto/sqlalchemy.py | 2 +- .../impl/url/suggestion/url_type/user.py | 2 +- src/db/models/views/url_anno_count.py | 16 +++--- src/db/models/views/url_annotations_flags.py | 10 ++-- 17 files changed, 78 insertions(+), 30 deletions(-) create mode 100644 alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py diff --git a/alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py b/alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py new file mode 100644 index 00000000..fe6718b2 --- /dev/null +++ b/alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py @@ -0,0 +1,49 @@ +"""Rename suggestion tables to consistent nomenclature + +Revision ID: 9292faed37fd +Revises: dfb64594049f +Create Date: 2025-12-18 09:51:20.074946 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '9292faed37fd' +down_revision: Union[str, None] = 'dfb64594049f' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +OLD_NEW_TABLE_MAPPING = { + # Anonymous Suggestions + "anonymous_annotation_agency": "annotation__anon__agency", + "anonymous_annotation_location": "annotation__anon__location", + "anonymous_annotation_record_type": "annotation__anon__record_type", + "anonymous_annotation_url_type": "annotation__anon__url_type", + # User Suggestions + "user_url_agency_suggestions": "annotation__user__agency", + "user_location_suggestions": "annotation__user__location", + "user_record_type_suggestions": "annotation__user__record_type", + "user_url_type_suggestions": "annotation__user__url_type", + # Auto suggestions + "auto_location_id_subtasks": "annotation__auto__location__subtasks", + "location_id_subtask_suggestions": "annotation__auto__location__suggestions", + "url_auto_agency_id_subtasks": "annotation__auto__agency__subtasks", + "agency_id_subtask_suggestions": "annotation__auto__agency__suggestions", + "auto_record_type_suggestions": "annotation__auto__record_type", + "auto_relevant_suggestions": "annotation__auto__url_type" +} + +def upgrade() -> None: + for old_table_name, new_table_name in OLD_NEW_TABLE_MAPPING.items(): + op.rename_table( + old_table_name=old_table_name, + new_table_name=new_table_name + ) + + +def downgrade() -> None: + pass diff --git a/src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py b/src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py index 7a297ef1..9fa3e5f5 100644 --- a/src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py +++ b/src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py @@ -14,7 +14,7 @@ class URLAutoAgencyIDSubtask( CreatedAtMixin ): - __tablename__ = "url_auto_agency_id_subtasks" + __tablename__ = "annotation__auto__agency__subtasks" type: Mapped[AutoAgencyIDSubtaskType] = enum_column( AutoAgencyIDSubtaskType, diff --git a/src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py b/src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py index 3f8b8186..ff3748c6 100644 --- a/src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py +++ b/src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py @@ -10,12 +10,12 @@ class AgencyIDSubtaskSuggestion( CreatedAtMixin, AgencyDependentMixin, ): - __tablename__ = "agency_id_subtask_suggestions" + __tablename__ = "annotation__auto__agency__suggestions" subtask_id = sa.Column( sa.Integer, - sa.ForeignKey("url_auto_agency_id_subtasks.id"), + sa.ForeignKey("annotation__auto__agency__subtasks.id"), nullable=False ) confidence = sa.Column( diff --git a/src/db/models/impl/url/suggestion/agency/user.py b/src/db/models/impl/url/suggestion/agency/user.py index 79fa933c..c6154b16 100644 --- a/src/db/models/impl/url/suggestion/agency/user.py +++ b/src/db/models/impl/url/suggestion/agency/user.py @@ -4,11 +4,10 @@ from src.db.models.helpers import get_agency_id_foreign_column from src.db.models.mixins import URLDependentMixin from src.db.models.templates_.base import Base -from src.db.models.templates_.with_id import WithIDBase class UserURLAgencySuggestion(URLDependentMixin, Base): - __tablename__ = "user_url_agency_suggestions" + __tablename__ = "annotation__user__agency" __table_args__ = ( PrimaryKeyConstraint("agency_id", "url_id", "user_id"), ) diff --git a/src/db/models/impl/url/suggestion/anonymous/agency/sqlalchemy.py b/src/db/models/impl/url/suggestion/anonymous/agency/sqlalchemy.py index 6f750289..a99c92e8 100644 --- a/src/db/models/impl/url/suggestion/anonymous/agency/sqlalchemy.py +++ b/src/db/models/impl/url/suggestion/anonymous/agency/sqlalchemy.py @@ -11,7 +11,7 @@ class AnonymousAnnotationAgency( CreatedAtMixin, AnonymousSessionMixin ): - __tablename__ = "anonymous_annotation_agency" + __tablename__ = "annotation__anon__agency" __table_args__ = ( PrimaryKeyConstraint("session_id", "url_id", "agency_id"), ) \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/anonymous/location/sqlalchemy.py b/src/db/models/impl/url/suggestion/anonymous/location/sqlalchemy.py index 3e39810b..c44d76bd 100644 --- a/src/db/models/impl/url/suggestion/anonymous/location/sqlalchemy.py +++ b/src/db/models/impl/url/suggestion/anonymous/location/sqlalchemy.py @@ -12,7 +12,7 @@ class AnonymousAnnotationLocation( AnonymousSessionMixin ): - __tablename__ = "anonymous_annotation_location" + __tablename__ = "annotation__anon__location" __table_args__ = ( PrimaryKeyConstraint("session_id", "url_id", "location_id"), ) \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/anonymous/record_type/sqlalchemy.py b/src/db/models/impl/url/suggestion/anonymous/record_type/sqlalchemy.py index 22f37839..67432bce 100644 --- a/src/db/models/impl/url/suggestion/anonymous/record_type/sqlalchemy.py +++ b/src/db/models/impl/url/suggestion/anonymous/record_type/sqlalchemy.py @@ -13,7 +13,7 @@ class AnonymousAnnotationRecordType( CreatedAtMixin, AnonymousSessionMixin ): - __tablename__ = "anonymous_annotation_record_type" + __tablename__ = "annotation__anon__record_type" __table_args__ = ( PrimaryKeyConstraint("session_id", "url_id", "record_type"), ) diff --git a/src/db/models/impl/url/suggestion/anonymous/url_type/sqlalchemy.py b/src/db/models/impl/url/suggestion/anonymous/url_type/sqlalchemy.py index f0cbc6a7..87efb760 100644 --- a/src/db/models/impl/url/suggestion/anonymous/url_type/sqlalchemy.py +++ b/src/db/models/impl/url/suggestion/anonymous/url_type/sqlalchemy.py @@ -13,7 +13,7 @@ class AnonymousAnnotationURLType( CreatedAtMixin, AnonymousSessionMixin ): - __tablename__ = "anonymous_annotation_url_type" + __tablename__ = "annotation__anon__url_type" __table_args__ = ( PrimaryKeyConstraint("session_id", "url_id", "url_type"), ) diff --git a/src/db/models/impl/url/suggestion/location/auto/subtask/sqlalchemy.py b/src/db/models/impl/url/suggestion/location/auto/subtask/sqlalchemy.py index b7412d1e..7d4e67bf 100644 --- a/src/db/models/impl/url/suggestion/location/auto/subtask/sqlalchemy.py +++ b/src/db/models/impl/url/suggestion/location/auto/subtask/sqlalchemy.py @@ -15,7 +15,7 @@ class AutoLocationIDSubtask( URLDependentMixin, ): - __tablename__ = 'auto_location_id_subtasks' + __tablename__ = 'annotation__auto__location__subtasks' locations_found = Column(Boolean(), nullable=False) type: Mapped[LocationIDSubtaskType] = enum_column( diff --git a/src/db/models/impl/url/suggestion/location/auto/suggestion/sqlalchemy.py b/src/db/models/impl/url/suggestion/location/auto/suggestion/sqlalchemy.py index 0d5ea926..650ee9a7 100644 --- a/src/db/models/impl/url/suggestion/location/auto/suggestion/sqlalchemy.py +++ b/src/db/models/impl/url/suggestion/location/auto/suggestion/sqlalchemy.py @@ -9,7 +9,7 @@ class LocationIDSubtaskSuggestion( Base, ): - __tablename__ = 'location_id_subtask_suggestions' + __tablename__ = 'annotation__auto__location__suggestions' __table_args__ = ( PrimaryKeyConstraint( 'subtask_id', @@ -19,7 +19,7 @@ class LocationIDSubtaskSuggestion( ) subtask_id = Column( Integer, - ForeignKey('auto_location_id_subtasks.id'), + ForeignKey('annotation__auto__location__subtasks.id'), nullable=False, primary_key=True, ) diff --git a/src/db/models/impl/url/suggestion/location/user/sqlalchemy.py b/src/db/models/impl/url/suggestion/location/user/sqlalchemy.py index 18ac3851..76883cfb 100644 --- a/src/db/models/impl/url/suggestion/location/user/sqlalchemy.py +++ b/src/db/models/impl/url/suggestion/location/user/sqlalchemy.py @@ -12,7 +12,7 @@ class UserLocationSuggestion( LocationDependentMixin, URLDependentMixin ): - __tablename__ = 'user_location_suggestions' + __tablename__ = 'annotation__user__location' __table_args__ = ( PrimaryKeyConstraint('url_id', 'location_id', 'user_id'), ) diff --git a/src/db/models/impl/url/suggestion/record_type/auto.py b/src/db/models/impl/url/suggestion/record_type/auto.py index 1c2c68d1..39f345af 100644 --- a/src/db/models/impl/url/suggestion/record_type/auto.py +++ b/src/db/models/impl/url/suggestion/record_type/auto.py @@ -14,7 +14,7 @@ class AutoRecordTypeSuggestion( URLDependentMixin, Base, ): - __tablename__ = "auto_record_type_suggestions" + __tablename__ = "annotation__auto__record_type" record_type = Column(postgresql.ENUM(*record_type_values, name='record_type'), nullable=False) __table_args__ = ( diff --git a/src/db/models/impl/url/suggestion/record_type/user.py b/src/db/models/impl/url/suggestion/record_type/user.py index 4e271225..f238fca7 100644 --- a/src/db/models/impl/url/suggestion/record_type/user.py +++ b/src/db/models/impl/url/suggestion/record_type/user.py @@ -14,7 +14,7 @@ class UserRecordTypeSuggestion( URLDependentMixin, Base, ): - __tablename__ = "user_record_type_suggestions" + __tablename__ = "annotation__user__record_type" __table_args__ = ( PrimaryKeyConstraint("url_id", "user_id"), ) diff --git a/src/db/models/impl/url/suggestion/url_type/auto/sqlalchemy.py b/src/db/models/impl/url/suggestion/url_type/auto/sqlalchemy.py index 19b5dc09..7944ba5e 100644 --- a/src/db/models/impl/url/suggestion/url_type/auto/sqlalchemy.py +++ b/src/db/models/impl/url/suggestion/url_type/auto/sqlalchemy.py @@ -12,7 +12,7 @@ class AutoRelevantSuggestion( URLDependentMixin, Base, ): - __tablename__ = "auto_relevant_suggestions" + __tablename__ = "annotation__auto__url_type" relevant = Column(Boolean, nullable=True) confidence = Column(Float, nullable=True) diff --git a/src/db/models/impl/url/suggestion/url_type/user.py b/src/db/models/impl/url/suggestion/url_type/user.py index 52bbc4eb..896a6054 100644 --- a/src/db/models/impl/url/suggestion/url_type/user.py +++ b/src/db/models/impl/url/suggestion/url_type/user.py @@ -15,7 +15,7 @@ class UserURLTypeSuggestion( URLDependentMixin, Base, ): - __tablename__ = "user_url_type_suggestions" + __tablename__ = "annotation__user__url_type" __table_args__ = ( PrimaryKeyConstraint("url_id", "user_id"), ) diff --git a/src/db/models/views/url_anno_count.py b/src/db/models/views/url_anno_count.py index 2e910afb..f3909b39 100644 --- a/src/db/models/views/url_anno_count.py +++ b/src/db/models/views/url_anno_count.py @@ -5,7 +5,7 @@ u.id, count(anno.url_id) as cnt from urls u - inner join public.auto_location_id_subtasks anno on u.id = anno.url_id + inner join public.annotation__auto__location__subtasks anno on u.id = anno.url_id group by u.id ) , auto_agency_count as ( @@ -13,7 +13,7 @@ u.id, count(anno.url_id) as cnt from urls u - inner join public.url_auto_agency_id_subtasks anno on u.id = anno.url_id + inner join public.annotation__auto__agency__subtasks anno on u.id = anno.url_id group by u.id ) , auto_url_type_count as ( @@ -21,7 +21,7 @@ u.id, count(anno.url_id) as cnt from urls u - inner join public.auto_relevant_suggestions anno on u.id = anno.url_id + inner join public.annotation__auto__url_type anno on u.id = anno.url_id group by u.id ) , auto_record_type_count as ( @@ -29,7 +29,7 @@ u.id, count(anno.url_id) as cnt from urls u - inner join public.auto_record_type_suggestions anno on u.id = anno.url_id + inner join public.annotation__auto__record_type anno on u.id = anno.url_id group by u.id ) , user_location_count as ( @@ -37,7 +37,7 @@ u.id, count(anno.url_id) as cnt from urls u - inner join public.user_location_suggestions anno on u.id = anno.url_id + inner join public.annotation__user__location anno on u.id = anno.url_id group by u.id ) , user_agency_count as ( @@ -45,7 +45,7 @@ u.id, count(anno.url_id) as cnt from urls u - inner join public.user_url_agency_suggestions anno on u.id = anno.url_id + inner join public.annotation__user__agency anno on u.id = anno.url_id group by u.id ) , user_url_type_count as ( @@ -53,7 +53,7 @@ u.id, count(anno.url_id) as cnt from urls u - inner join public.user_url_type_suggestions anno on u.id = anno.url_id + inner join public.annotation__user__url_type anno on u.id = anno.url_id group by u.id ) , user_record_type_count as ( @@ -61,7 +61,7 @@ u.id, count(anno.url_id) as cnt from urls u - inner join public.user_record_type_suggestions anno on u.id = anno.url_id + inner join public.annotation__user__record_type anno on u.id = anno.url_id group by u.id ) select diff --git a/src/db/models/views/url_annotations_flags.py b/src/db/models/views/url_annotations_flags.py index c133fbfc..b194a5e0 100644 --- a/src/db/models/views/url_annotations_flags.py +++ b/src/db/models/views/url_annotations_flags.py @@ -11,12 +11,12 @@ CASE WHEN cua.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_confirmed_agency, CASE WHEN ruu.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS was_reviewed FROM urls u - LEFT JOIN public.auto_record_type_suggestions arts ON u.id = arts.url_id - LEFT JOIN public.auto_relevant_suggestions ars ON u.id = ars.url_id + LEFT JOIN public.annotation__auto__record_type arts ON u.id = arts.url_id + LEFT JOIN public.annotation__auto__url_type ars ON u.id = ars.url_id LEFT JOIN public.{URL_AUTO_AGENCY_SUGGESTIONS_TABLE_NAME} auas ON u.id = auas.url_id - LEFT JOIN public.user_record_type_suggestions urts ON u.id = urts.url_id - LEFT JOIN public.user_relevant_suggestions urs ON u.id = urs.url_id - LEFT JOIN public.user_url_agency_suggestions uuas ON u.id = uuas.url_id + LEFT JOIN public.annotation__user__record_type urts ON u.id = urts.url_id + LEFT JOIN public.annotation__user__url_type urs ON u.id = urs.url_id + LEFT JOIN public.annotation__user__agency uuas ON u.id = uuas.url_id LEFT JOIN public.reviewing_user_url ruu ON u.id = ruu.url_id LEFT JOIN public.link_agencies__urls cua on u.id = cua.url_id ) From b3600e9807117c5f0d408abb3dd71bd5a9ff0e30 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Fri, 19 Dec 2025 16:27:36 -0500 Subject: [PATCH 08/24] Change organization of annotations. --- ...rename_suggestion_tables_to_consistent_.py | 32 +++--- src/api/endpoints/annotate/_shared/extract.py | 4 +- .../all/get/queries/agency/requester.py | 32 +++--- .../annotate/all/get/queries/convert.py | 8 +- .../annotate/all/get/queries/core.py | 32 +++--- .../all/get/queries/location_/requester.py | 32 +++--- .../annotate/all/get/queries/name/core.py | 22 ++--- .../endpoints/annotate/all/post/requester.py | 26 ++--- .../endpoints/annotate/anonymous/get/query.py | 16 +-- .../annotate/anonymous/post/query.py | 27 +++-- .../contributions/shared/contributions.py | 6 +- .../user/queries/agreement/agency.py | 4 +- .../user/queries/agreement/record_type.py | 12 +-- .../user/queries/agreement/url_type.py | 12 +-- .../user/queries/annotated_and_validated.py | 10 +- .../metrics/urls/breakdown/query/core.py | 20 ++-- .../submit/data_source/queries/core.py | 22 ++--- src/api/endpoints/submit/url/queries/core.py | 20 ++-- .../operators/agency_identification/core.py | 2 +- .../agency_identification/subtasks/convert.py | 4 +- .../subtasks/flags/core.py | 2 +- .../subtasks/flags/mappings.py | 2 +- .../subtasks/impl/batch_link/core.py | 4 +- .../subtasks/impl/ckan_/core.py | 2 +- .../subtasks/impl/homepage_match_/convert.py | 6 +- .../subtasks/impl/homepage_match_/core.py | 4 +- .../impl/homepage_match_/models/entry.py | 2 +- .../queries/ctes/multi_agency_case.py | 4 +- .../queries/ctes/single_agency_case.py | 2 +- .../impl/homepage_match_/queries/get.py | 2 +- .../subtasks/impl/muckrock_/core.py | 4 +- .../impl/nlp_location_match_/convert.py | 4 +- .../impl/nlp_location_match_/query_/query.py | 18 ++-- .../agency_identification/subtasks/loader.py | 2 +- .../subtasks/models/subtask.py | 2 +- .../subtasks/queries/survey/constants.py | 2 +- .../subtasks/queries/survey/queries/core.py | 2 +- .../exists/high_confidence_annotations.py | 14 +-- .../survey/queries/ctes/subtask/helpers.py | 8 +- .../queries/ctes/subtask/impl/batch_link.py | 2 +- .../survey/queries/ctes/subtask/impl/ckan.py | 2 +- .../queries/ctes/subtask/impl/homepage.py | 2 +- .../queries/ctes/subtask/impl/muckrock.py | 2 +- .../queries/ctes/subtask/impl/nlp_location.py | 20 ++-- .../queries/survey/queries/eligible_counts.py | 2 +- .../subtasks/templates/subtask.py | 4 +- .../tasks/url/operators/auto_name/clean.py | 2 +- .../tasks/url/operators/auto_name/core.py | 4 +- .../url/operators/auto_name/queries/cte.py | 10 +- .../tasks/url/operators/auto_relevant/core.py | 3 +- .../operators/auto_relevant/queries/cte.py | 4 +- .../operators/auto_relevant/queries/get.py | 4 - .../tasks/url/operators/location_id/core.py | 2 +- .../location_id/subtasks/flags/core.py | 2 +- .../location_id/subtasks/flags/mappings.py | 2 +- .../subtasks/impl/batch_link/core.py | 4 +- .../nlp_location_freq/processor/convert.py | 4 +- .../operators/location_id/subtasks/loader.py | 2 +- .../location_id/subtasks/models/subtask.py | 2 +- .../subtasks/queries/survey/constants.py | 2 +- .../subtasks/queries/survey/queries/core.py | 2 +- .../exists/high_confidence_annotations.py | 14 +-- .../survey/queries/ctes/subtask/helpers.py | 8 +- .../queries/ctes/subtask/impl/batch_link.py | 2 +- .../ctes/subtask/impl/nlp_location_freq.py | 2 +- .../queries/survey/queries/eligible_counts.py | 2 +- .../location_id/subtasks/templates/subtask.py | 4 +- .../tasks/url/operators/record_type/core.py | 6 +- .../url/operators/record_type/queries/cte.py | 4 +- .../queries/ctes/counts/impl/agency.py | 20 ++-- .../queries/ctes/counts/impl/location.py | 21 ++-- .../validate/queries/ctes/counts/impl/name.py | 28 +++--- .../queries/ctes/counts/impl/record_type.py | 20 ++-- .../queries/ctes/counts/impl/url_type.py | 20 ++-- src/db/client/async_.py | 30 +++--- src/db/client/types.py | 8 +- src/db/constants.py | 12 +-- src/db/dto_converter.py | 24 ++--- src/db/models/impl/agency/sqlalchemy.py | 4 +- .../{url/suggestion => annotation}/README.md | 0 .../__init__.py | 0 .../agency}/__init__.py | 0 .../agency/anon}/__init__.py | 0 .../agency/anon}/sqlalchemy.py | 4 +- .../agency/auto}/__init__.py | 0 .../agency/auto/subtask}/__init__.py | 0 .../agency/auto}/subtask/enum.py | 0 .../agency/auto}/subtask/pydantic.py | 6 +- .../agency/auto}/subtask/sqlalchemy.py | 8 +- .../agency/auto/suggestion}/__init__.py | 0 .../agency/auto}/suggestion/pydantic.py | 4 +- .../agency/auto}/suggestion/sqlalchemy.py | 6 +- .../agency/user}/__init__.py | 0 .../agency/user/sqlalchemy.py} | 4 +- .../location}/__init__.py | 0 .../location/anon}/__init__.py | 0 .../location/anon}/sqlalchemy.py | 4 +- .../location/auto}/__init__.py | 0 .../location/auto/subtask}/__init__.py | 0 .../location/auto/subtask/constants.py | 0 .../location/auto/subtask/enums.py | 0 .../location/auto/subtask/pydantic.py | 6 +- .../location/auto/subtask/sqlalchemy.py | 10 +- .../location/auto/suggestion}/__init__.py | 0 .../location/auto/suggestion/pydantic.py | 4 +- .../location/auto/suggestion/sqlalchemy.py | 6 +- .../location/user}/__init__.py | 0 .../location/user/pydantic.py | 4 +- .../location/user/sqlalchemy.py | 4 +- .../name}/__init__.py | 0 .../user => annotation/name/anon}/__init__.py | 0 .../name/anon/sqlalchemy.py} | 6 +- .../name/suggestion}/__init__.py | 0 .../name/suggestion}/enums.py | 0 .../annotation/name/suggestion/pydantic.py | 17 ++++ .../name/suggestion}/sqlalchemy.py | 8 +- .../name/user}/__init__.py | 0 .../name/user}/pydantic.py | 2 +- .../name/user}/sqlalchemy.py | 4 +- .../record_type}/__init__.py | 0 .../record_type/anon}/__init__.py | 0 .../record_type/anon}/sqlalchemy.py | 4 +- .../record_type/auto}/__init__.py | 0 .../record_type/auto/sqlalchemy.py} | 4 +- .../annotation/record_type/user/__init__.py | 0 .../record_type/user}/user.py | 4 +- .../impl/annotation/url_type/__init__.py | 0 .../impl/annotation/url_type/anon/__init__.py | 0 .../url_type/anon}/sqlalchemy.py | 4 +- .../impl/annotation/url_type/auto/__init__.py | 0 .../url_type/auto/pydantic/__init__.py | 0 .../url_type/auto/pydantic/input.py | 0 .../url_type/auto/sqlalchemy.py | 4 +- .../impl/annotation/url_type/user/__init__.py | 0 .../url_type/user/sqlalchemy.py} | 4 +- src/db/models/impl/anon_session/__init__.py | 0 .../session => anon_session}/sqlalchemy.py | 0 src/db/models/impl/url/core/sqlalchemy.py | 31 +++--- .../impl/url/suggestion/anonymous/__init__.py | 1 - .../impl/url/suggestion/name/pydantic.py | 17 ---- .../implementations/anonymous_session.py | 2 +- .../common/annotation_exists_/constants.py | 24 ++--- .../core/metrics/urls/aggregated/pending.py | 12 +-- src/db/types.py | 8 +- .../api/annotate/all/test_happy_path.py | 22 ++--- .../api/annotate/anonymous/test_core.py | 26 ++--- .../api/submit/data_source/test_core.py | 20 ++-- .../api/submit/test_url_maximal.py | 28 +++--- .../api/url/by_id/delete/test_any_url.py | 98 +++++++++---------- .../integration/readonly/setup/annotations.py | 26 ++--- .../subtasks/batch_link/test_core.py | 12 +-- .../subtasks/ckan/test_core.py | 14 +-- .../homepage_match/test_happy_path.py | 22 ++--- .../subtasks/muckrock/test_core.py | 14 +-- .../end_to_end/test_multi_agency_location.py | 12 +-- .../end_to_end/test_single_agency_location.py | 14 +-- .../survey/test_survey_flag.py | 2 +- .../tasks/url/impl/auto_name/test_core.py | 8 +- .../tasks/url/impl/auto_relevant/test_task.py | 8 +- .../subtasks/batch_link/test_core.py | 12 +-- .../end_to_end/test_core.py | 12 +-- .../survey/test_survey_flag.py | 2 +- .../url/impl/test_url_record_type_task.py | 4 +- .../tasks/url/impl/validate/helper.py | 2 +- .../url/impl/validate/test_data_source.py | 22 ++--- .../impl/suggestion/auto/agency_/core.py | 7 +- .../commands/impl/suggestion/auto/relevant.py | 2 +- tests/helpers/data_creator/core.py | 24 ++--- 168 files changed, 683 insertions(+), 697 deletions(-) rename src/db/models/impl/{url/suggestion => annotation}/README.md (100%) rename src/db/models/impl/{link/user_name_suggestion => annotation}/__init__.py (100%) rename src/db/models/impl/{url/suggestion => annotation/agency}/__init__.py (100%) rename src/db/models/impl/{url/suggestion/agency => annotation/agency/anon}/__init__.py (100%) rename src/db/models/impl/{url/suggestion/anonymous/agency => annotation/agency/anon}/sqlalchemy.py (83%) rename src/db/models/impl/{url/suggestion/agency/subtask => annotation/agency/auto}/__init__.py (100%) rename src/db/models/impl/{url/suggestion/agency/suggestion => annotation/agency/auto/subtask}/__init__.py (100%) rename src/db/models/impl/{url/suggestion/agency => annotation/agency/auto}/subtask/enum.py (100%) rename src/db/models/impl/{url/suggestion/agency => annotation/agency/auto}/subtask/pydantic.py (61%) rename src/db/models/impl/{url/suggestion/agency => annotation/agency/auto}/subtask/sqlalchemy.py (76%) rename src/db/models/impl/{url/suggestion/anonymous/agency => annotation/agency/auto/suggestion}/__init__.py (100%) rename src/db/models/impl/{url/suggestion/agency => annotation/agency/auto}/suggestion/pydantic.py (69%) rename src/db/models/impl/{url/suggestion/agency => annotation/agency/auto}/suggestion/sqlalchemy.py (77%) rename src/db/models/impl/{url/suggestion/anonymous/location => annotation/agency/user}/__init__.py (100%) rename src/db/models/impl/{url/suggestion/agency/user.py => annotation/agency/user/sqlalchemy.py} (85%) rename src/db/models/impl/{url/suggestion/anonymous/record_type => annotation/location}/__init__.py (100%) rename src/db/models/impl/{url/suggestion/anonymous/session => annotation/location/anon}/__init__.py (100%) rename src/db/models/impl/{url/suggestion/anonymous/location => annotation/location/anon}/sqlalchemy.py (83%) rename src/db/models/impl/{url/suggestion/anonymous/url_type => annotation/location/auto}/__init__.py (100%) rename src/db/models/impl/{url/suggestion/location => annotation/location/auto/subtask}/__init__.py (100%) rename src/db/models/impl/{url/suggestion => annotation}/location/auto/subtask/constants.py (100%) rename src/db/models/impl/{url/suggestion => annotation}/location/auto/subtask/enums.py (100%) rename src/db/models/impl/{url/suggestion => annotation}/location/auto/subtask/pydantic.py (60%) rename src/db/models/impl/{url/suggestion => annotation}/location/auto/subtask/sqlalchemy.py (63%) rename src/db/models/impl/{url/suggestion/location/auto => annotation/location/auto/suggestion}/__init__.py (100%) rename src/db/models/impl/{url/suggestion => annotation}/location/auto/suggestion/pydantic.py (68%) rename src/db/models/impl/{url/suggestion => annotation}/location/auto/suggestion/sqlalchemy.py (78%) rename src/db/models/impl/{url/suggestion/location/auto/subtask => annotation/location/user}/__init__.py (100%) rename src/db/models/impl/{url/suggestion => annotation}/location/user/pydantic.py (70%) rename src/db/models/impl/{url/suggestion => annotation}/location/user/sqlalchemy.py (88%) rename src/db/models/impl/{url/suggestion/location/auto/suggestion => annotation/name}/__init__.py (100%) rename src/db/models/impl/{url/suggestion/location/user => annotation/name/anon}/__init__.py (100%) rename src/db/models/impl/{link/anonymous_sessions__name_suggestion.py => annotation/name/anon/sqlalchemy.py} (75%) rename src/db/models/impl/{url/suggestion/name => annotation/name/suggestion}/__init__.py (100%) rename src/db/models/impl/{url/suggestion/name => annotation/name/suggestion}/enums.py (100%) create mode 100644 src/db/models/impl/annotation/name/suggestion/pydantic.py rename src/db/models/impl/{url/suggestion/name => annotation/name/suggestion}/sqlalchemy.py (65%) rename src/db/models/impl/{url/suggestion/record_type => annotation/name/user}/__init__.py (100%) rename src/db/models/impl/{link/user_name_suggestion => annotation/name/user}/pydantic.py (75%) rename src/db/models/impl/{link/user_name_suggestion => annotation/name/user}/sqlalchemy.py (78%) rename src/db/models/impl/{url/suggestion/url_type => annotation/record_type}/__init__.py (100%) rename src/db/models/impl/{url/suggestion/url_type/auto => annotation/record_type/anon}/__init__.py (100%) rename src/db/models/impl/{url/suggestion/anonymous/record_type => annotation/record_type/anon}/sqlalchemy.py (87%) rename src/db/models/impl/{url/suggestion/url_type/auto/pydantic => annotation/record_type/auto}/__init__.py (100%) rename src/db/models/impl/{url/suggestion/record_type/auto.py => annotation/record_type/auto/sqlalchemy.py} (89%) create mode 100644 src/db/models/impl/annotation/record_type/user/__init__.py rename src/db/models/impl/{url/suggestion/record_type => annotation/record_type/user}/user.py (90%) create mode 100644 src/db/models/impl/annotation/url_type/__init__.py create mode 100644 src/db/models/impl/annotation/url_type/anon/__init__.py rename src/db/models/impl/{url/suggestion/anonymous/url_type => annotation/url_type/anon}/sqlalchemy.py (87%) create mode 100644 src/db/models/impl/annotation/url_type/auto/__init__.py create mode 100644 src/db/models/impl/annotation/url_type/auto/pydantic/__init__.py rename src/db/models/impl/{url/suggestion => annotation}/url_type/auto/pydantic/input.py (100%) rename src/db/models/impl/{url/suggestion => annotation}/url_type/auto/sqlalchemy.py (90%) create mode 100644 src/db/models/impl/annotation/url_type/user/__init__.py rename src/db/models/impl/{url/suggestion/url_type/user.py => annotation/url_type/user/sqlalchemy.py} (91%) create mode 100644 src/db/models/impl/anon_session/__init__.py rename src/db/models/impl/{url/suggestion/anonymous/session => anon_session}/sqlalchemy.py (100%) delete mode 100644 src/db/models/impl/url/suggestion/anonymous/__init__.py delete mode 100644 src/db/models/impl/url/suggestion/name/pydantic.py diff --git a/alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py b/alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py index fe6718b2..fabfe098 100644 --- a/alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py +++ b/alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py @@ -19,22 +19,26 @@ OLD_NEW_TABLE_MAPPING = { # Anonymous Suggestions - "anonymous_annotation_agency": "annotation__anon__agency", - "anonymous_annotation_location": "annotation__anon__location", - "anonymous_annotation_record_type": "annotation__anon__record_type", - "anonymous_annotation_url_type": "annotation__anon__url_type", + "anonymous_annotation_agency": "annotation__agency__anon", + "anonymous_annotation_location": "annotation__location__anon", + "anonymous_annotation_record_type": "annotation__record_type__anon", + "anonymous_annotation_url_type": "annotation__url_type__anon", # User Suggestions - "user_url_agency_suggestions": "annotation__user__agency", - "user_location_suggestions": "annotation__user__location", - "user_record_type_suggestions": "annotation__user__record_type", - "user_url_type_suggestions": "annotation__user__url_type", + "user_url_agency_suggestions": "annotation__agency__user", + "user_location_suggestions": "annotation__location__user", + "user_record_type_suggestions": "annotation__record_type__user", + "user_url_type_suggestions": "annotation__url_type__user", # Auto suggestions - "auto_location_id_subtasks": "annotation__auto__location__subtasks", - "location_id_subtask_suggestions": "annotation__auto__location__suggestions", - "url_auto_agency_id_subtasks": "annotation__auto__agency__subtasks", - "agency_id_subtask_suggestions": "annotation__auto__agency__suggestions", - "auto_record_type_suggestions": "annotation__auto__record_type", - "auto_relevant_suggestions": "annotation__auto__url_type" + "auto_location_id_subtasks": "annotation__location__auto__subtasks", + "location_id_subtask_suggestions": "annotation__location__auto__suggestions", + "url_auto_agency_id_subtasks": "annotation__agency__auto__subtasks", + "agency_id_subtask_suggestions": "annotation__agency__auto__suggestions", + "auto_record_type_suggestions": "annotation__record_type__auto", + "auto_relevant_suggestions": "annotation__url_type__auto", + # Name suggestions + "url_name_suggestions": "annotation__name__suggestions", + "link__anonymous_sessions__name_suggestions": "annotation__name__anon__endorsements", + "link_user_name_suggestions": "annotation__name__user__endorsements", } def upgrade() -> None: diff --git a/src/api/endpoints/annotate/_shared/extract.py b/src/api/endpoints/annotate/_shared/extract.py index 3fb7770b..12368cd6 100644 --- a/src/api/endpoints/annotate/_shared/extract.py +++ b/src/api/endpoints/annotate/_shared/extract.py @@ -16,8 +16,8 @@ from src.api.endpoints.annotate.all.get.queries.name.core import GetNameSuggestionsQueryBuilder from src.db.dto_converter import DTOConverter from src.db.dtos.url.mapping_.simple import SimpleURLMapping +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion async def extract_and_format_get_annotation_result( @@ -55,7 +55,7 @@ async def extract_and_format_get_annotation_result( batch_info=await GetAnnotationBatchInfoQueryBuilder( batch_id=batch_id, models=[ - UserURLAgencySuggestion, + AnnotationAgencyUser, ] ).run(session), location_suggestions=location_suggestions, diff --git a/src/api/endpoints/annotate/all/get/queries/agency/requester.py b/src/api/endpoints/annotate/all/get/queries/agency/requester.py index 9d933ae2..68d801b5 100644 --- a/src/api/endpoints/annotate/all/get/queries/agency/requester.py +++ b/src/api/endpoints/annotate/all/get/queries/agency/requester.py @@ -8,10 +8,10 @@ from src.db.helpers.query import exists_url from src.db.helpers.session import session_helper as sh from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.templates.requester import RequesterBase @@ -36,10 +36,10 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]: .where( or_( exists_url( - UserURLAgencySuggestion + AnnotationAgencyUser ), exists_url( - URLAutoAgencyIDSubtask + AnnotationAgencyAutoSubtask ) ) ) @@ -49,13 +49,13 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]: # Number of users who suggested each agency user_suggestions_cte = ( select( - UserURLAgencySuggestion.url_id, - UserURLAgencySuggestion.agency_id, - func.count(UserURLAgencySuggestion.user_id).label('user_count') + AnnotationAgencyUser.url_id, + AnnotationAgencyUser.agency_id, + func.count(AnnotationAgencyUser.user_id).label('user_count') ) .group_by( - UserURLAgencySuggestion.agency_id, - UserURLAgencySuggestion.url_id, + AnnotationAgencyUser.agency_id, + AnnotationAgencyUser.url_id, ) .cte("user_suggestions") ) @@ -63,20 +63,20 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]: # Maximum confidence of robo annotation, if any robo_suggestions_cte = ( select( - URLAutoAgencyIDSubtask.url_id, + AnnotationAgencyAutoSubtask.url_id, Agency.id.label("agency_id"), - func.max(AgencyIDSubtaskSuggestion.confidence).label('robo_confidence') + func.max(AnnotationAgencyAutoSuggestion.confidence).label('robo_confidence') ) .join( - AgencyIDSubtaskSuggestion, - AgencyIDSubtaskSuggestion.subtask_id == URLAutoAgencyIDSubtask.id + AnnotationAgencyAutoSuggestion, + AnnotationAgencyAutoSuggestion.subtask_id == AnnotationAgencyAutoSubtask.id ) .join( Agency, - Agency.id == AgencyIDSubtaskSuggestion.agency_id + Agency.id == AnnotationAgencyAutoSuggestion.agency_id ) .group_by( - URLAutoAgencyIDSubtask.url_id, + AnnotationAgencyAutoSubtask.url_id, Agency.id ) .cte("robo_suggestions") diff --git a/src/api/endpoints/annotate/all/get/queries/convert.py b/src/api/endpoints/annotate/all/get/queries/convert.py index fe9b0777..0b0f0791 100644 --- a/src/api/endpoints/annotate/all/get/queries/convert.py +++ b/src/api/endpoints/annotate/all/get/queries/convert.py @@ -5,12 +5,12 @@ from src.api.endpoints.annotate.all.get.models.url_type import URLTypeAnnotationSuggestion from src.core.enums import RecordType from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType def convert_user_url_type_suggestion_to_url_type_annotation_suggestion( - db_suggestions: list[UserURLTypeSuggestion] + db_suggestions: list[AnnotationUserURLType] ) -> list[URLTypeAnnotationSuggestion]: counter: Counter[URLType] = Counter() for suggestion in db_suggestions: @@ -26,7 +26,7 @@ def convert_user_url_type_suggestion_to_url_type_annotation_suggestion( return anno_suggestions def convert_user_record_type_suggestion_to_record_type_annotation_suggestion( - db_suggestions: list[UserRecordTypeSuggestion] + db_suggestions: list[AnnotationUserRecordType] ) -> RecordTypeAnnotationResponseOuterInfo: counter: Counter[RecordType] = Counter() for suggestion in db_suggestions: diff --git a/src/api/endpoints/annotate/all/get/queries/core.py b/src/api/endpoints/annotate/all/get/queries/core.py index 89975a08..f3021bc4 100644 --- a/src/api/endpoints/annotate/all/get/queries/core.py +++ b/src/api/endpoints/annotate/all/get/queries/core.py @@ -5,13 +5,13 @@ from src.api.endpoints.annotate._shared.extract import extract_and_format_get_annotation_result from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.collectors.enums import URLStatus +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType from src.db.models.views.unvalidated_url import UnvalidatedURL from src.db.models.views.url_anno_count import URLAnnotationCount from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView @@ -61,35 +61,35 @@ async def run( URL.status == URLStatus.OK.value, # Must not have been previously annotated by user ~exists( - select(UserURLTypeSuggestion.url_id) + select(AnnotationUserURLType.url_id) .where( - UserURLTypeSuggestion.url_id == URL.id, - UserURLTypeSuggestion.user_id == self.user_id, + AnnotationUserURLType.url_id == URL.id, + AnnotationUserURLType.user_id == self.user_id, ) ), ~exists( - select(UserURLAgencySuggestion.url_id) + select(AnnotationAgencyUser.url_id) .where( - UserURLAgencySuggestion.url_id == URL.id, - UserURLAgencySuggestion.user_id == self.user_id, + AnnotationAgencyUser.url_id == URL.id, + AnnotationAgencyUser.user_id == self.user_id, ) ), ~exists( select( - UserLocationSuggestion.url_id + AnnotationLocationUser.url_id ) .where( - UserLocationSuggestion.url_id == URL.id, - UserLocationSuggestion.user_id == self.user_id, + AnnotationLocationUser.url_id == URL.id, + AnnotationLocationUser.user_id == self.user_id, ) ), ~exists( select( - UserRecordTypeSuggestion.url_id + AnnotationUserRecordType.url_id ) .where( - UserRecordTypeSuggestion.url_id == URL.id, - UserRecordTypeSuggestion.user_id == self.user_id, + AnnotationUserRecordType.url_id == URL.id, + AnnotationUserRecordType.user_id == self.user_id, ) ), ~exists( diff --git a/src/api/endpoints/annotate/all/get/queries/location_/requester.py b/src/api/endpoints/annotate/all/get/queries/location_/requester.py index fad8e834..49f00f89 100644 --- a/src/api/endpoints/annotate/all/get/queries/location_/requester.py +++ b/src/api/endpoints/annotate/all/get/queries/location_/requester.py @@ -6,10 +6,10 @@ from src.api.endpoints.annotate.all.get.queries._shared.sort import sort_suggestions from src.db.helpers.query import exists_url from src.db.helpers.session import session_helper as sh +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion from src.db.models.views.location_expanded import LocationExpandedView from src.db.templates.requester import RequesterBase @@ -25,10 +25,10 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]: .where( or_( exists_url( - UserLocationSuggestion + AnnotationLocationUser ), exists_url( - AutoLocationIDSubtask + AnnotationLocationAutoSubtask ) ) ) @@ -37,34 +37,34 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]: # Number of users who suggested each location user_suggestions_cte = ( select( - UserLocationSuggestion.url_id, - UserLocationSuggestion.location_id, - func.count(UserLocationSuggestion.user_id).label('user_count') + AnnotationLocationUser.url_id, + AnnotationLocationUser.location_id, + func.count(AnnotationLocationUser.user_id).label('user_count') ) .group_by( - UserLocationSuggestion.location_id, - UserLocationSuggestion.url_id, + AnnotationLocationUser.location_id, + AnnotationLocationUser.url_id, ) .cte("user_suggestions") ) # Maximum confidence of robo annotation, if any robo_suggestions_cte = ( select( - AutoLocationIDSubtask.url_id, + AnnotationLocationAutoSubtask.url_id, LocationExpandedView.id.label("location_id"), - func.max(LocationIDSubtaskSuggestion.confidence).label('robo_confidence') + func.max(AnnotationLocationAutoSuggestion.confidence).label('robo_confidence') ) .join( LocationExpandedView, - LocationExpandedView.id == LocationIDSubtaskSuggestion.location_id + LocationExpandedView.id == AnnotationLocationAutoSuggestion.location_id ) .join( - AutoLocationIDSubtask, - AutoLocationIDSubtask.id == LocationIDSubtaskSuggestion.subtask_id + AnnotationLocationAutoSubtask, + AnnotationLocationAutoSubtask.id == AnnotationLocationAutoSuggestion.subtask_id ) .group_by( LocationExpandedView.id, - AutoLocationIDSubtask.url_id, + AnnotationLocationAutoSubtask.url_id, ) .cte("robo_suggestions") ) diff --git a/src/api/endpoints/annotate/all/get/queries/name/core.py b/src/api/endpoints/annotate/all/get/queries/name/core.py index 9438f14e..9eba70ee 100644 --- a/src/api/endpoints/annotate/all/get/queries/name/core.py +++ b/src/api/endpoints/annotate/all/get/queries/name/core.py @@ -5,9 +5,9 @@ from src.api.endpoints.annotate.all.get.models.name import NameAnnotationSuggestion, NameAnnotationResponseOuterInfo from src.db.helpers.session import session_helper as sh -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion from src.db.queries.base.builder import QueryBuilderBase @@ -23,30 +23,30 @@ def __init__( async def run(self, session: AsyncSession) -> NameAnnotationResponseOuterInfo: query = ( select( - URLNameSuggestion.id.label('id'), - URLNameSuggestion.suggestion.label('display_name'), + AnnotationNameSuggestion.id.label('id'), + AnnotationNameSuggestion.suggestion.label('display_name'), func.count( LinkUserNameSuggestion.user_id ).label('user_count'), case( - (URLNameSuggestion.source == NameSuggestionSource.HTML_METADATA_TITLE, 1), + (AnnotationNameSuggestion.source == NameSuggestionSource.HTML_METADATA_TITLE, 1), else_=0 ).label("robo_count") ) .outerjoin( LinkUserNameSuggestion, - LinkUserNameSuggestion.suggestion_id == URLNameSuggestion.id, + LinkUserNameSuggestion.suggestion_id == AnnotationNameSuggestion.id, ) .where( - URLNameSuggestion.url_id == self.url_id, + AnnotationNameSuggestion.url_id == self.url_id, ) .group_by( - URLNameSuggestion.id, - URLNameSuggestion.suggestion, + AnnotationNameSuggestion.id, + AnnotationNameSuggestion.suggestion, ) .order_by( func.count(LinkUserNameSuggestion.user_id).desc(), - URLNameSuggestion.id.asc(), + AnnotationNameSuggestion.id.asc(), ) .limit(3) ) diff --git a/src/api/endpoints/annotate/all/post/requester.py b/src/api/endpoints/annotate/all/post/requester.py index 8834ff76..2034ecc1 100644 --- a/src/api/endpoints/annotate/all/post/requester.py +++ b/src/api/endpoints/annotate/all/post/requester.py @@ -2,16 +2,16 @@ from src.api.endpoints.annotate.all.post.models.name import AnnotationPostNameInfo from src.core.enums import RecordType +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType from src.db.templates.requester import RequesterBase @@ -33,7 +33,7 @@ def optionally_add_record_type( ) -> None: if rt is None: return - record_type_suggestion = UserRecordTypeSuggestion( + record_type_suggestion = AnnotationUserRecordType( url_id=self.url_id, user_id=self.user_id, record_type=rt.value @@ -44,7 +44,7 @@ def add_relevant_annotation( self, url_type: URLType, ) -> None: - relevant_suggestion = UserURLTypeSuggestion( + relevant_suggestion = AnnotationUserURLType( url_id=self.url_id, user_id=self.user_id, type=url_type @@ -53,7 +53,7 @@ def add_relevant_annotation( def add_agency_ids(self, agency_ids: list[int]) -> None: for agency_id in agency_ids: - agency_suggestion = UserURLAgencySuggestion( + agency_suggestion = AnnotationAgencyUser( url_id=self.url_id, user_id=self.user_id, agency_id=agency_id, @@ -61,9 +61,9 @@ def add_agency_ids(self, agency_ids: list[int]) -> None: self.session.add(agency_suggestion) def add_location_ids(self, location_ids: list[int]) -> None: - locations: list[UserLocationSuggestion] = [] + locations: list[AnnotationLocationUser] = [] for location_id in location_ids: - locations.append(UserLocationSuggestion( + locations.append(AnnotationLocationUser( url_id=self.url_id, user_id=self.user_id, location_id=location_id @@ -83,7 +83,7 @@ async def optionally_add_name_suggestion( ) self.session.add(link) return - name_suggestion = URLNameSuggestion( + name_suggestion = AnnotationNameSuggestion( url_id=self.url_id, suggestion=name_info.new_name, source=NameSuggestionSource.USER diff --git a/src/api/endpoints/annotate/anonymous/get/query.py b/src/api/endpoints/annotate/anonymous/get/query.py index 041d5cda..ba179ca4 100644 --- a/src/api/endpoints/annotate/anonymous/get/query.py +++ b/src/api/endpoints/annotate/anonymous/get/query.py @@ -11,12 +11,12 @@ from src.api.endpoints.annotate.anonymous.get.response import GetNextURLForAnonymousAnnotationResponse from src.collectors.enums import URLStatus from src.db.helpers.query import not_exists_url +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency -from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation -from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType from src.db.models.views.unvalidated_url import UnvalidatedURL from src.db.models.views.url_anno_count import URLAnnotationCount from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView @@ -54,19 +54,19 @@ async def run(self, session: AsyncSession) -> GetNextURLForAnonymousAnnotationRe # Must not have been previously annotated by user not_exists_anon_annotation( session_id=self.session_id, - anon_model=AnonymousAnnotationURLType + anon_model=AnnotationAnonURLType ), not_exists_anon_annotation( session_id=self.session_id, - anon_model=AnonymousAnnotationRecordType + anon_model=AnnotationAnonRecordType ), not_exists_anon_annotation( session_id=self.session_id, - anon_model=AnonymousAnnotationLocation + anon_model=AnnotationLocationAnon ), not_exists_anon_annotation( session_id=self.session_id, - anon_model=AnonymousAnnotationAgency + anon_model=AnnotationAgencyAnon ), ~exists( select( diff --git a/src/api/endpoints/annotate/anonymous/post/query.py b/src/api/endpoints/annotate/anonymous/post/query.py index 29670c80..50ebad7c 100644 --- a/src/api/endpoints/annotate/anonymous/post/query.py +++ b/src/api/endpoints/annotate/anonymous/post/query.py @@ -3,13 +3,13 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo -from src.db.models.impl.link.anonymous_sessions__name_suggestion import LinkAnonymousSessionNameSuggestion -from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency -from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation -from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement from src.db.queries.base.builder import QueryBuilderBase @@ -27,7 +27,7 @@ def __init__( async def run(self, session: AsyncSession) -> None: - url_type_suggestion = AnonymousAnnotationURLType( + url_type_suggestion = AnnotationAnonURLType( url_id=self.url_id, url_type=self.post_info.suggested_status, session_id=self.session_id @@ -36,7 +36,7 @@ async def run(self, session: AsyncSession) -> None: name_id: int | None if self.post_info.name_info.new_name is not None: - name_suggestion = URLNameSuggestion( + name_suggestion = AnnotationNameSuggestion( url_id=self.url_id, suggestion=self.post_info.name_info.new_name, source=NameSuggestionSource.USER @@ -50,14 +50,14 @@ async def run(self, session: AsyncSession) -> None: name_id = None if name_id is not None: - name_suggestion = LinkAnonymousSessionNameSuggestion( + name_suggestion = AnnotationNameAnonEndorsement( suggestion_id=name_id, session_id=self.session_id ) session.add(name_suggestion) if self.post_info.record_type is not None: - record_type_suggestion = AnonymousAnnotationRecordType( + record_type_suggestion = AnnotationAnonRecordType( url_id=self.url_id, record_type=self.post_info.record_type, session_id=self.session_id @@ -66,7 +66,7 @@ async def run(self, session: AsyncSession) -> None: if len(self.post_info.location_info.location_ids) != 0: location_suggestions = [ - AnonymousAnnotationLocation( + AnnotationLocationAnon( url_id=self.url_id, location_id=location_id, session_id=self.session_id @@ -77,7 +77,7 @@ async def run(self, session: AsyncSession) -> None: if len(self.post_info.agency_info.agency_ids) != 0: agency_suggestions = [ - AnonymousAnnotationAgency( + AnnotationAgencyAnon( url_id=self.url_id, agency_id=agency_id, session_id=self.session_id @@ -86,4 +86,3 @@ async def run(self, session: AsyncSession) -> None: ] session.add_all(agency_suggestions) - # Ignore Name suggestions \ No newline at end of file diff --git a/src/api/endpoints/contributions/shared/contributions.py b/src/api/endpoints/contributions/shared/contributions.py index ae72fc00..e62c0e7f 100644 --- a/src/api/endpoints/contributions/shared/contributions.py +++ b/src/api/endpoints/contributions/shared/contributions.py @@ -1,6 +1,6 @@ from sqlalchemy import select, func, CTE, Column -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType class ContributionsCTEContainer: @@ -8,11 +8,11 @@ class ContributionsCTEContainer: def __init__(self): self._cte = ( select( - UserURLTypeSuggestion.user_id, + AnnotationUserURLType.user_id, func.count().label("count") ) .group_by( - UserURLTypeSuggestion.user_id + AnnotationUserURLType.user_id ) .cte("contributions") ) diff --git a/src/api/endpoints/contributions/user/queries/agreement/agency.py b/src/api/endpoints/contributions/user/queries/agreement/agency.py index 01000bf2..c1dfeed7 100644 --- a/src/api/endpoints/contributions/user/queries/agreement/agency.py +++ b/src/api/endpoints/contributions/user/queries/agreement/agency.py @@ -1,14 +1,14 @@ from sqlalchemy import select, func, exists, and_, or_, any_, cast, Float from src.api.endpoints.contributions.user.queries.templates.agreement import AgreementCTEContainer +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion def get_agency_agreement_cte_container() -> AgreementCTEContainer: - uuas = UserURLAgencySuggestion + uuas = AnnotationAgencyUser fuv = FlagURLValidated lau = LinkURLAgency # CTE 1: All validated Meta URLs/Data Sources and their agencies diff --git a/src/api/endpoints/contributions/user/queries/agreement/record_type.py b/src/api/endpoints/contributions/user/queries/agreement/record_type.py index 2cde5ab5..278c4c60 100644 --- a/src/api/endpoints/contributions/user/queries/agreement/record_type.py +++ b/src/api/endpoints/contributions/user/queries/agreement/record_type.py @@ -3,7 +3,7 @@ from src.api.endpoints.contributions.user.queries.annotated_and_validated import AnnotatedAndValidatedCTEContainer from src.api.endpoints.contributions.user.queries.templates.agreement import AgreementCTEContainer from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType def get_record_type_agreement_cte_container( @@ -16,8 +16,8 @@ def get_record_type_agreement_cte_container( func.count() ) .join( - UserRecordTypeSuggestion, - UserRecordTypeSuggestion.url_id == inner_cte.url_id + AnnotationUserRecordType, + AnnotationUserRecordType.url_id == inner_cte.url_id ) .group_by( inner_cte.user_id @@ -31,14 +31,14 @@ def get_record_type_agreement_cte_container( func.count() ) .join( - UserRecordTypeSuggestion, - UserRecordTypeSuggestion.url_id == inner_cte.url_id + AnnotationUserRecordType, + AnnotationUserRecordType.url_id == inner_cte.url_id ) .join( URLRecordType, and_( URLRecordType.url_id == inner_cte.url_id, - URLRecordType.record_type == UserRecordTypeSuggestion.record_type + URLRecordType.record_type == AnnotationUserRecordType.record_type ) ) .group_by( diff --git a/src/api/endpoints/contributions/user/queries/agreement/url_type.py b/src/api/endpoints/contributions/user/queries/agreement/url_type.py index 12feb834..57a2a5a1 100644 --- a/src/api/endpoints/contributions/user/queries/agreement/url_type.py +++ b/src/api/endpoints/contributions/user/queries/agreement/url_type.py @@ -3,7 +3,7 @@ from src.api.endpoints.contributions.user.queries.annotated_and_validated import AnnotatedAndValidatedCTEContainer from src.api.endpoints.contributions.user.queries.templates.agreement import AgreementCTEContainer from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType def get_url_type_agreement_cte_container( @@ -17,8 +17,8 @@ def get_url_type_agreement_cte_container( func.count() ) .join( - UserURLTypeSuggestion, - UserURLTypeSuggestion.url_id == inner_cte.url_id + AnnotationUserURLType, + AnnotationUserURLType.url_id == inner_cte.url_id ) .join( FlagURLValidated, @@ -36,14 +36,14 @@ def get_url_type_agreement_cte_container( func.count() ) .join( - UserURLTypeSuggestion, - UserURLTypeSuggestion.url_id == inner_cte.url_id + AnnotationUserURLType, + AnnotationUserURLType.url_id == inner_cte.url_id ) .join( FlagURLValidated, and_( FlagURLValidated.url_id == inner_cte.url_id, - UserURLTypeSuggestion.type == FlagURLValidated.type + AnnotationUserURLType.type == FlagURLValidated.type ) ) diff --git a/src/api/endpoints/contributions/user/queries/annotated_and_validated.py b/src/api/endpoints/contributions/user/queries/annotated_and_validated.py index 9c7c48f6..1be14e28 100644 --- a/src/api/endpoints/contributions/user/queries/annotated_and_validated.py +++ b/src/api/endpoints/contributions/user/queries/annotated_and_validated.py @@ -1,7 +1,7 @@ from sqlalchemy import select, Column, CTE from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType class AnnotatedAndValidatedCTEContainer: @@ -9,16 +9,16 @@ class AnnotatedAndValidatedCTEContainer: def __init__(self, user_id: int | None): self._cte = ( select( - UserURLTypeSuggestion.user_id, - UserURLTypeSuggestion.url_id + AnnotationUserURLType.user_id, + AnnotationUserURLType.url_id ) .join( FlagURLValidated, - FlagURLValidated.url_id == UserURLTypeSuggestion.url_id + FlagURLValidated.url_id == AnnotationUserURLType.url_id ) ) if user_id is not None: - self._cte = self._cte.where(UserURLTypeSuggestion.user_id == user_id) + self._cte = self._cte.where(AnnotationUserURLType.user_id == user_id) self._cte = self._cte.cte("annotated_and_validated") @property diff --git a/src/api/endpoints/metrics/urls/breakdown/query/core.py b/src/api/endpoints/metrics/urls/breakdown/query/core.py index bccc7d68..949c8abd 100644 --- a/src/api/endpoints/metrics/urls/breakdown/query/core.py +++ b/src/api/endpoints/metrics/urls/breakdown/query/core.py @@ -1,16 +1,14 @@ -from typing import Any - from sqlalchemy import select, case, literal, func from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.metrics.dtos.get.urls.breakdown.pending import GetMetricsURLsBreakdownPendingResponseInnerDTO, \ GetMetricsURLsBreakdownPendingResponseDTO from src.collectors.enums import URLStatus +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType from src.db.queries.base.builder import QueryBuilderBase @@ -21,19 +19,19 @@ async def run(self, session: AsyncSession) -> GetMetricsURLsBreakdownPendingResp flags = ( select( URL.id.label("url_id"), - case((UserRecordTypeSuggestion.url_id != None, literal(True)), else_=literal(False)).label( + case((AnnotationUserRecordType.url_id != None, literal(True)), else_=literal(False)).label( "has_user_record_type_annotation" ), - case((UserURLTypeSuggestion.url_id != None, literal(True)), else_=literal(False)).label( + case((AnnotationUserURLType.url_id != None, literal(True)), else_=literal(False)).label( "has_user_relevant_annotation" ), - case((UserURLAgencySuggestion.url_id != None, literal(True)), else_=literal(False)).label( + case((AnnotationAgencyUser.url_id != None, literal(True)), else_=literal(False)).label( "has_user_agency_annotation" ), ) - .outerjoin(UserRecordTypeSuggestion, URL.id == UserRecordTypeSuggestion.url_id) - .outerjoin(UserURLTypeSuggestion, URL.id == UserURLTypeSuggestion.url_id) - .outerjoin(UserURLAgencySuggestion, URL.id == UserURLAgencySuggestion.url_id) + .outerjoin(AnnotationUserRecordType, URL.id == AnnotationUserRecordType.url_id) + .outerjoin(AnnotationUserURLType, URL.id == AnnotationUserURLType.url_id) + .outerjoin(AnnotationAgencyUser, URL.id == AnnotationAgencyUser.url_id) ).cte("flags") month = func.date_trunc('month', URL.created_at) diff --git a/src/api/endpoints/submit/data_source/queries/core.py b/src/api/endpoints/submit/data_source/queries/core.py index 1f97cd11..17233386 100644 --- a/src/api/endpoints/submit/data_source/queries/core.py +++ b/src/api/endpoints/submit/data_source/queries/core.py @@ -8,18 +8,18 @@ from src.api.endpoints.submit.data_source.request import DataSourceSubmissionRequest from src.collectors.enums import URLStatus from src.core.enums import BatchStatus +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata -from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency -from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation -from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion from src.db.queries.base.builder import QueryBuilderBase from src.db.queries.implementations.anonymous_session import MakeAnonymousSessionQueryBuilder from src.util.models.full_url import FullURL @@ -75,7 +75,7 @@ async def run( session_id: uuid.UUID = await MakeAnonymousSessionQueryBuilder().run(session=session) # Add URL Type Suggestion - url_type_suggestion = AnonymousAnnotationURLType( + url_type_suggestion = AnnotationAnonURLType( url_id=url_id, url_type=URLType.DATA_SOURCE, session_id=session_id @@ -84,7 +84,7 @@ async def run( # Optionally add Record Type as suggestion if self.request.record_type is not None: - record_type_suggestion = AnonymousAnnotationRecordType( + record_type_suggestion = AnnotationAnonRecordType( url_id=url_id, record_type=self.request.record_type.value, session_id=session_id @@ -94,7 +94,7 @@ async def run( # Optionally add Agency ID suggestions if self.request.agency_ids is not None: agency_id_suggestions = [ - AnonymousAnnotationAgency( + AnnotationAgencyAnon( url_id=url_id, agency_id=agency_id, session_id=session_id @@ -106,7 +106,7 @@ async def run( # Optionally add Location ID suggestions if self.request.location_ids is not None: location_id_suggestions = [ - AnonymousAnnotationLocation( + AnnotationLocationAnon( url_id=url_id, location_id=location_id, session_id=session_id @@ -117,7 +117,7 @@ async def run( # Optionally add name suggestion if self.request.name is not None: - name_suggestion = URLNameSuggestion( + name_suggestion = AnnotationNameSuggestion( url_id=url_id, suggestion=self.request.name, source=NameSuggestionSource.USER diff --git a/src/api/endpoints/submit/url/queries/core.py b/src/api/endpoints/submit/url/queries/core.py index 0d2c1c84..ccbbc2c4 100644 --- a/src/api/endpoints/submit/url/queries/core.py +++ b/src/api/endpoints/submit/url/queries/core.py @@ -8,15 +8,15 @@ convert_duplicate_urls_to_url_response from src.api.endpoints.submit.url.queries.dedupe import DeduplicateURLQueryBuilder from src.collectors.enums import URLStatus -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion from src.db.models.impl.link.user_suggestion_not_found.users_submitted_url.sqlalchemy import LinkUserSubmittedURL from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType from src.db.queries.base.builder import QueryBuilderBase from src.util.models.url_and_scheme import URLAndScheme from src.util.url import clean_url, get_url_and_scheme, is_valid_url @@ -77,7 +77,7 @@ async def run(self, session: AsyncSession) -> URLSubmissionResponse: # Add record type as suggestion if exists if self.request.record_type is not None: - rec_sugg = UserRecordTypeSuggestion( + rec_sugg = AnnotationUserRecordType( user_id=self.user_id, url_id=url_insert.id, record_type=self.request.record_type.value @@ -86,7 +86,7 @@ async def run(self, session: AsyncSession) -> URLSubmissionResponse: # Add name as suggestion if exists if self.request.name is not None: - name_sugg = URLNameSuggestion( + name_sugg = AnnotationNameSuggestion( url_id=url_insert.id, suggestion=self.request.name, source=NameSuggestionSource.USER @@ -104,7 +104,7 @@ async def run(self, session: AsyncSession) -> URLSubmissionResponse: # Add location ID as suggestion if exists if self.request.location_id is not None: - loc_sugg = UserLocationSuggestion( + loc_sugg = AnnotationLocationUser( user_id=self.user_id, url_id=url_insert.id, location_id=self.request.location_id @@ -113,7 +113,7 @@ async def run(self, session: AsyncSession) -> URLSubmissionResponse: # Add agency ID as suggestion if exists if self.request.agency_id is not None: - agen_sugg = UserURLAgencySuggestion( + agen_sugg = AnnotationAgencyUser( user_id=self.user_id, url_id=url_insert.id, agency_id=self.request.agency_id diff --git a/src/core/tasks/url/operators/agency_identification/core.py b/src/core/tasks/url/operators/agency_identification/core.py index 7657ea0e..536e4fec 100644 --- a/src/core/tasks/url/operators/agency_identification/core.py +++ b/src/core/tasks/url/operators/agency_identification/core.py @@ -9,7 +9,7 @@ from src.core.tasks.url.operators.base import URLTaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient from src.db.enums import TaskType -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType class AgencyIdentificationTaskOperator( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/convert.py b/src/core/tasks/url/operators/agency_identification/subtasks/convert.py index 5cead5d3..a7d4735d 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/convert.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/convert.py @@ -1,7 +1,7 @@ from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic def convert_agency_suggestions_to_subtask_data( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/flags/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/flags/core.py index 41997322..4eaeaeaa 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/flags/core.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/flags/core.py @@ -2,7 +2,7 @@ from environs import Env from src.core.tasks.url.operators.agency_identification.subtasks.flags.mappings import SUBTASK_TO_ENV_FLAG -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType class SubtaskFlagger: diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/flags/mappings.py b/src/core/tasks/url/operators/agency_identification/subtasks/flags/mappings.py index dcc0b60c..cc45b123 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/flags/mappings.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/flags/mappings.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType SUBTASK_TO_ENV_FLAG: dict[AutoAgencyIDSubtaskType, str] = { AutoAgencyIDSubtaskType.HOMEPAGE_MATCH: "AGENCY_ID_HOMEPAGE_MATCH_FLAG", diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/batch_link/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/batch_link/core.py index 9e15996f..83d4d11a 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/batch_link/core.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/batch_link/core.py @@ -6,8 +6,8 @@ from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import AgencyIDSubtaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic class AgencyBatchLinkSubtaskOperator(AgencyIDSubtaskOperatorBase): diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/core.py index 2603191a..275bb3c6 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/core.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/core.py @@ -13,7 +13,7 @@ from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import \ AgencyIDSubtaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from src.external.pdap.client import PDAPClient diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/convert.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/convert.py index f4ba913e..186ed9ca 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/convert.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/convert.py @@ -2,9 +2,9 @@ GetHomepageMatchParams from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.models.mapping import \ SubtaskURLMapping -from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode, AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic -from src.db.models.impl.url.suggestion.agency.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic +from src.db.models.impl.annotation.agency.auto.subtask.enum import SubtaskDetailCode, AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic def convert_params_to_subtask_entries( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/core.py index f335cb3a..d072aa6d 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/core.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/core.py @@ -7,8 +7,8 @@ from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.get import \ GetHomepageMatchSubtaskURLsQueryBuilder from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import AgencyIDSubtaskOperatorBase -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic -from src.db.models.impl.url.suggestion.agency.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic class HomepageMatchSubtaskOperator( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/models/entry.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/models/entry.py index 6c65f9ad..989e1a7b 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/models/entry.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/models/entry.py @@ -1,6 +1,6 @@ from pydantic import BaseModel, Field -from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.enum import SubtaskDetailCode class GetHomepageMatchParams(BaseModel): diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/multi_agency_case.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/multi_agency_case.py index edf9e601..9c1fca04 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/multi_agency_case.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/multi_agency_case.py @@ -1,8 +1,8 @@ -from sqlalchemy import CTE, select, literal +from sqlalchemy import select, literal from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.consolidated import \ CONSOLIDATED_CTE -from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.enum import SubtaskDetailCode MULTI_AGENCY_CASE_QUERY = ( select( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/single_agency_case.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/single_agency_case.py index 5778ecb6..31638d31 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/single_agency_case.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/single_agency_case.py @@ -2,7 +2,7 @@ from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.consolidated import \ CONSOLIDATED_CTE -from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.enum import SubtaskDetailCode SINGLE_AGENCY_CASE_QUERY = ( select( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/get.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/get.py index 10619531..05f7dd81 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/get.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/get.py @@ -10,7 +10,7 @@ from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.single_agency_case import \ SINGLE_AGENCY_CASE_QUERY from src.db.helpers.session import session_helper as sh -from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.enum import SubtaskDetailCode from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/core.py index 030139ad..dd77b94e 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/core.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/core.py @@ -16,8 +16,8 @@ from src.core.tasks.url.operators.agency_identification.subtasks.queries.match_agency import MatchAgencyQueryBuilder from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import AgencyIDSubtaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic from src.external.pdap.client import PDAPClient diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/convert.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/convert.py index 2766bff0..a3b8bb0f 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/convert.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/convert.py @@ -2,8 +2,8 @@ NLPLocationMatchSubtaskInput from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic def convert_location_agency_mappings_to_subtask_data_list( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/query_/query.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/query_/query.py index f0dcac94..94eb48aa 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/query_/query.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/query_/query.py @@ -8,9 +8,9 @@ NLPLocationMatchSubtaskInput, LocationAnnotationToAgencyIDMapping, LocationAnnotation from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.impl.nlp_location import \ NLP_LOCATION_CONTAINER +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion from src.db.queries.base.builder import QueryBuilderBase from src.db.helpers.session import session_helper as sh @@ -21,21 +21,21 @@ async def run(self, session: AsyncSession) -> list[NLPLocationMatchSubtaskInput] query = ( select( NLP_LOCATION_CONTAINER.url_id, - LocationIDSubtaskSuggestion.location_id, - LocationIDSubtaskSuggestion.confidence, + AnnotationLocationAutoSuggestion.location_id, + AnnotationLocationAutoSuggestion.confidence, LinkAgencyLocation.agency_id, ) .join( - AutoLocationIDSubtask, - AutoLocationIDSubtask.url_id == NLP_LOCATION_CONTAINER.url_id + AnnotationLocationAutoSubtask, + AnnotationLocationAutoSubtask.url_id == NLP_LOCATION_CONTAINER.url_id ) .join( - LocationIDSubtaskSuggestion, - LocationIDSubtaskSuggestion.subtask_id == AutoLocationIDSubtask.id + AnnotationLocationAutoSuggestion, + AnnotationLocationAutoSuggestion.subtask_id == AnnotationLocationAutoSubtask.id ) .join( LinkAgencyLocation, - LinkAgencyLocation.location_id == LocationIDSubtaskSuggestion.location_id + LinkAgencyLocation.location_id == AnnotationLocationAutoSuggestion.location_id ) .where( ~NLP_LOCATION_CONTAINER.entry_exists diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/loader.py b/src/core/tasks/url/operators/agency_identification/subtasks/loader.py index 24099540..fd14d34e 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/loader.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/loader.py @@ -10,7 +10,7 @@ NLPLocationMatchSubtaskOperator from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import AgencyIDSubtaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from src.external.pdap.client import PDAPClient diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/models/subtask.py b/src/core/tasks/url/operators/agency_identification/subtasks/models/subtask.py index 7da0a8f5..99f7b2d9 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/models/subtask.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/models/subtask.py @@ -1,7 +1,7 @@ from pydantic import BaseModel from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic class AutoAgencyIDSubtaskData(BaseModel): diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/constants.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/constants.py index bea99266..38a8b44c 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/constants.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/constants.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType # Determines priority of subtasks, all else being equal. SUBTASK_HIERARCHY: list[AutoAgencyIDSubtaskType] = [ diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/core.py index 2b81d2de..ef90db7f 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/core.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/core.py @@ -7,7 +7,7 @@ SUBTASK_HIERARCHY_MAPPING from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.eligible_counts import \ ELIGIBLE_COUNTS_QUERY -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from src.db.queries.base.builder import QueryBuilderBase from src.db.helpers.session import session_helper as sh diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/high_confidence_annotations.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/high_confidence_annotations.py index cfb92327..4c5aaa78 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/high_confidence_annotations.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/high_confidence_annotations.py @@ -2,24 +2,24 @@ from src.core.tasks.url.operators._shared.container.subtask.exists import \ URLsSubtaskExistsCTEContainer +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion cte = ( select( URL.id ) .join( - URLAutoAgencyIDSubtask, - URLAutoAgencyIDSubtask.url_id == URL.id, + AnnotationAgencyAutoSubtask, + AnnotationAgencyAutoSubtask.url_id == URL.id, ) .join( - AgencyIDSubtaskSuggestion, - AgencyIDSubtaskSuggestion.subtask_id == URLAutoAgencyIDSubtask.id, + AnnotationAgencyAutoSuggestion, + AnnotationAgencyAutoSuggestion.subtask_id == AnnotationAgencyAutoSubtask.id, ) .where( - AgencyIDSubtaskSuggestion.confidence >= 95, + AnnotationAgencyAutoSuggestion.confidence >= 95, ) .cte("high_confidence_annotations_exists") ) diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/helpers.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/helpers.py index b06442ea..7f4aff78 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/helpers.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/helpers.py @@ -1,8 +1,8 @@ from sqlalchemy import ColumnElement, exists +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask def get_exists_subtask_query( @@ -11,8 +11,8 @@ def get_exists_subtask_query( return ( exists() .where( - URLAutoAgencyIDSubtask.url_id == URL.id, - URLAutoAgencyIDSubtask.type == subtask_type, + AnnotationAgencyAutoSubtask.url_id == URL.id, + AnnotationAgencyAutoSubtask.type == subtask_type, ) .label("subtask_entry_exists") ) \ No newline at end of file diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/batch_link.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/batch_link.py index 42fcc02f..167262b8 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/batch_link.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/batch_link.py @@ -3,10 +3,10 @@ from src.core.tasks.url.operators._shared.container.subtask.eligible import URLsSubtaskEligibleCTEContainer from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.helpers import \ get_exists_subtask_query +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from src.db.models.impl.link.agency_batch.sqlalchemy import LinkAgencyBatch from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType cte = ( select( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/ckan.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/ckan.py index 6b8ed9e8..052a5fb3 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/ckan.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/ckan.py @@ -4,10 +4,10 @@ from src.core.tasks.url.operators._shared.container.subtask.eligible import URLsSubtaskEligibleCTEContainer from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.helpers import \ get_exists_subtask_query +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType cte = ( select( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/homepage.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/homepage.py index 7daba916..7cc9a065 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/homepage.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/homepage.py @@ -5,8 +5,8 @@ CONSOLIDATED_CTE from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.helpers import \ get_exists_subtask_query +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType VALID_URL_FLAG = ( exists() diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/muckrock.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/muckrock.py index 9e267f66..5a83e029 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/muckrock.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/muckrock.py @@ -4,10 +4,10 @@ from src.core.tasks.url.operators._shared.container.subtask.eligible import URLsSubtaskEligibleCTEContainer from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.helpers import \ get_exists_subtask_query +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType cte = ( select( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location.py index 7a15b67a..fb22379d 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location.py @@ -5,11 +5,11 @@ from src.core.tasks.url.operators._shared.container.subtask.eligible import URLsSubtaskEligibleCTEContainer from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.helpers import \ get_exists_subtask_query +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion cte = ( select( @@ -19,10 +19,10 @@ ) ) .join( - AutoLocationIDSubtask, + AnnotationLocationAutoSubtask, and_( - AutoLocationIDSubtask.url_id == URL.id, - AutoLocationIDSubtask.locations_found + AnnotationLocationAutoSubtask.url_id == URL.id, + AnnotationLocationAutoSubtask.locations_found ) ) .where( @@ -32,12 +32,12 @@ LinkAgencyLocation.location_id ) .join( - LocationIDSubtaskSuggestion, - LocationIDSubtaskSuggestion.location_id == LinkAgencyLocation.location_id, + AnnotationLocationAutoSuggestion, + AnnotationLocationAutoSuggestion.location_id == LinkAgencyLocation.location_id, ) .join( - AutoLocationIDSubtask, - AutoLocationIDSubtask.id == LocationIDSubtaskSuggestion.subtask_id, + AnnotationLocationAutoSubtask, + AnnotationLocationAutoSubtask.id == AnnotationLocationAutoSuggestion.subtask_id, ) ) diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/eligible_counts.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/eligible_counts.py index d3b7fe6b..79067aae 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/eligible_counts.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/eligible_counts.py @@ -2,7 +2,7 @@ from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.eligible import \ EligibleContainer -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType def sum_count(col: ColumnElement[bool], subtask_type: AutoAgencyIDSubtaskType) -> ColumnElement[int]: diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/templates/subtask.py b/src/core/tasks/url/operators/agency_identification/subtasks/templates/subtask.py index 9335afcf..d88933eb 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/templates/subtask.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/templates/subtask.py @@ -6,8 +6,8 @@ from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData from src.db.client.async_ import AsyncDatabaseClient from src.db.enums import TaskType -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic -from src.db.models.impl.url.suggestion.agency.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic from src.db.models.impl.url.task_error.pydantic_.insert import URLTaskErrorPydantic from src.db.models.impl.url.task_error.pydantic_.small import URLTaskErrorSmall diff --git a/src/core/tasks/url/operators/auto_name/clean.py b/src/core/tasks/url/operators/auto_name/clean.py index 2e1820ab..9c745829 100644 --- a/src/core/tasks/url/operators/auto_name/clean.py +++ b/src/core/tasks/url/operators/auto_name/clean.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.location.auto.subtask.constants import MAX_SUGGESTION_LENGTH +from src.db.models.impl.annotation.location.auto.subtask.constants import MAX_SUGGESTION_LENGTH def clean_title(title: str) -> str: diff --git a/src/core/tasks/url/operators/auto_name/core.py b/src/core/tasks/url/operators/auto_name/core.py index 00af9838..b5702008 100644 --- a/src/core/tasks/url/operators/auto_name/core.py +++ b/src/core/tasks/url/operators/auto_name/core.py @@ -4,8 +4,8 @@ from src.core.tasks.url.operators.auto_name.queries.prereq import AutoNamePrerequisitesQueryBuilder from src.core.tasks.url.operators.base import URLTaskOperatorBase from src.db.enums import TaskType -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.pydantic import URLNameSuggestionPydantic +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.pydantic import URLNameSuggestionPydantic class AutoNameURLTaskOperator(URLTaskOperatorBase): diff --git a/src/core/tasks/url/operators/auto_name/queries/cte.py b/src/core/tasks/url/operators/auto_name/queries/cte.py index 1c7fc503..ff8a958b 100644 --- a/src/core/tasks/url/operators/auto_name/queries/cte.py +++ b/src/core/tasks/url/operators/auto_name/queries/cte.py @@ -2,10 +2,10 @@ from src.db.enums import URLHTMLContentType, TaskType from src.db.helpers.query import no_url_task_error +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion class AutoNamePrerequisiteCTEContainer: @@ -24,11 +24,11 @@ def __init__(self): URLHTMLContent.content_type == URLHTMLContentType.TITLE.value, ~exists( select( - URLNameSuggestion.id + AnnotationNameSuggestion.id ) .where( - URLNameSuggestion.url_id == URL.id, - URLNameSuggestion.source == NameSuggestionSource.HTML_METADATA_TITLE.value, + AnnotationNameSuggestion.url_id == URL.id, + AnnotationNameSuggestion.source == NameSuggestionSource.HTML_METADATA_TITLE.value, ) ), no_url_task_error(TaskType.AUTO_NAME) diff --git a/src/core/tasks/url/operators/auto_relevant/core.py b/src/core/tasks/url/operators/auto_relevant/core.py index 3acff217..ea2a80d4 100644 --- a/src/core/tasks/url/operators/auto_relevant/core.py +++ b/src/core/tasks/url/operators/auto_relevant/core.py @@ -5,9 +5,8 @@ from src.core.tasks.url.operators.auto_relevant.sort import separate_success_and_error_subsets from src.core.tasks.url.operators.base import URLTaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.url_type.auto.pydantic.input import AutoRelevancyAnnotationInput from src.db.enums import TaskType -from src.db.models.impl.url.task_error.pydantic_.insert import URLTaskErrorPydantic +from src.db.models.impl.annotation.url_type.auto.pydantic.input import AutoRelevancyAnnotationInput from src.db.models.impl.url.task_error.pydantic_.small import URLTaskErrorSmall from src.external.huggingface.inference.client import HuggingFaceInferenceClient from src.external.huggingface.inference.models.input import BasicInput diff --git a/src/core/tasks/url/operators/auto_relevant/queries/cte.py b/src/core/tasks/url/operators/auto_relevant/queries/cte.py index c8b816fd..354e4bd5 100644 --- a/src/core/tasks/url/operators/auto_relevant/queries/cte.py +++ b/src/core/tasks/url/operators/auto_relevant/queries/cte.py @@ -6,7 +6,7 @@ from src.db.helpers.query import not_exists_url, no_url_task_error from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML -from src.db.models.impl.url.suggestion.url_type.auto.sqlalchemy import AutoRelevantSuggestion +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType class AutoRelevantPrerequisitesCTEContainer: @@ -22,7 +22,7 @@ def __init__(self): ) .where( URL.status == URLStatus.OK.value, - not_exists_url(AutoRelevantSuggestion), + not_exists_url(AnnotationAutoURLType), no_url_task_error(TaskType.RELEVANCY) ).cte("auto_relevant_prerequisites") ) diff --git a/src/core/tasks/url/operators/auto_relevant/queries/get.py b/src/core/tasks/url/operators/auto_relevant/queries/get.py index b566bb42..1ed115fa 100644 --- a/src/core/tasks/url/operators/auto_relevant/queries/get.py +++ b/src/core/tasks/url/operators/auto_relevant/queries/get.py @@ -4,14 +4,10 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import selectinload -from src.collectors.enums import URLStatus from src.core.tasks.url.operators.auto_relevant.models.tdo import URLRelevantTDO from src.core.tasks.url.operators.auto_relevant.queries.cte import AutoRelevantPrerequisitesCTEContainer -from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.url_type.auto.sqlalchemy import AutoRelevantSuggestion from src.db.queries.base.builder import QueryBuilderBase -from src.db.statement_composer import StatementComposer from src.db.utils.compression import decompress_html diff --git a/src/core/tasks/url/operators/location_id/core.py b/src/core/tasks/url/operators/location_id/core.py index 3833a80c..82f7df13 100644 --- a/src/core/tasks/url/operators/location_id/core.py +++ b/src/core/tasks/url/operators/location_id/core.py @@ -8,7 +8,7 @@ from src.core.tasks.url.operators.location_id.subtasks.templates.subtask import LocationIDSubtaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient from src.db.enums import TaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType class LocationIdentificationTaskOperator( diff --git a/src/core/tasks/url/operators/location_id/subtasks/flags/core.py b/src/core/tasks/url/operators/location_id/subtasks/flags/core.py index 1b6cb55c..21765643 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/flags/core.py +++ b/src/core/tasks/url/operators/location_id/subtasks/flags/core.py @@ -1,7 +1,7 @@ from environs import Env from src.core.tasks.url.operators.location_id.subtasks.flags.mappings import SUBTASK_TO_ENV_FLAG -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType class SubtaskFlagger: diff --git a/src/core/tasks/url/operators/location_id/subtasks/flags/mappings.py b/src/core/tasks/url/operators/location_id/subtasks/flags/mappings.py index 48f5d194..548c4f7b 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/flags/mappings.py +++ b/src/core/tasks/url/operators/location_id/subtasks/flags/mappings.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType SUBTASK_TO_ENV_FLAG: dict[LocationIDSubtaskType, str] = { LocationIDSubtaskType.NLP_LOCATION_FREQUENCY: "LOCATION_ID_NLP_LOCATION_MATCH_FLAG", diff --git a/src/core/tasks/url/operators/location_id/subtasks/impl/batch_link/core.py b/src/core/tasks/url/operators/location_id/subtasks/impl/batch_link/core.py index a85e572a..59a7faf8 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/impl/batch_link/core.py +++ b/src/core/tasks/url/operators/location_id/subtasks/impl/batch_link/core.py @@ -5,8 +5,8 @@ from src.core.tasks.url.operators.location_id.subtasks.models.suggestion import LocationSuggestion from src.core.tasks.url.operators.location_id.subtasks.templates.subtask import LocationIDSubtaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic class LocationBatchLinkSubtaskOperator(LocationIDSubtaskOperatorBase): diff --git a/src/core/tasks/url/operators/location_id/subtasks/impl/nlp_location_freq/processor/convert.py b/src/core/tasks/url/operators/location_id/subtasks/impl/nlp_location_freq/processor/convert.py index 8ec60b35..26b0ff32 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/impl/nlp_location_freq/processor/convert.py +++ b/src/core/tasks/url/operators/location_id/subtasks/impl/nlp_location_freq/processor/convert.py @@ -17,8 +17,8 @@ SearchSimilarLocationsResponse from src.core.tasks.url.operators.location_id.subtasks.models.subtask import AutoLocationIDSubtaskData from src.core.tasks.url.operators.location_id.subtasks.models.suggestion import LocationSuggestion -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic def convert_invalid_url_nlp_mappings_to_subtask_data_list( diff --git a/src/core/tasks/url/operators/location_id/subtasks/loader.py b/src/core/tasks/url/operators/location_id/subtasks/loader.py index 408b5a07..38ea8bed 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/loader.py +++ b/src/core/tasks/url/operators/location_id/subtasks/loader.py @@ -4,7 +4,7 @@ from src.core.tasks.url.operators.location_id.subtasks.impl.nlp_location_freq.processor.nlp.core import NLPProcessor from src.core.tasks.url.operators.location_id.subtasks.templates.subtask import LocationIDSubtaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType class LocationIdentificationSubtaskLoader: diff --git a/src/core/tasks/url/operators/location_id/subtasks/models/subtask.py b/src/core/tasks/url/operators/location_id/subtasks/models/subtask.py index b06d2ff9..fa935ecb 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/models/subtask.py +++ b/src/core/tasks/url/operators/location_id/subtasks/models/subtask.py @@ -1,7 +1,7 @@ from pydantic import BaseModel from src.core.tasks.url.operators.location_id.subtasks.models.suggestion import LocationSuggestion -from src.db.models.impl.url.suggestion.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic +from src.db.models.impl.annotation.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic class AutoLocationIDSubtaskData(BaseModel): diff --git a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/constants.py b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/constants.py index b9f85e2d..f3093b03 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/constants.py +++ b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/constants.py @@ -1,5 +1,5 @@ # Determines priority of subtasks, all else being equal. -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType SUBTASK_HIERARCHY: list[LocationIDSubtaskType] = [ LocationIDSubtaskType.NLP_LOCATION_FREQUENCY, diff --git a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/core.py b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/core.py index c267b89e..44cb0627 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/core.py +++ b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/core.py @@ -6,7 +6,7 @@ from src.core.tasks.url.operators.location_id.subtasks.queries.survey.constants import SUBTASK_HIERARCHY_MAPPING from src.core.tasks.url.operators.location_id.subtasks.queries.survey.queries.eligible_counts import \ ELIGIBLE_COUNTS_QUERY -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType from src.db.queries.base.builder import QueryBuilderBase from src.db.helpers.session import session_helper as sh diff --git a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/exists/high_confidence_annotations.py b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/exists/high_confidence_annotations.py index 7d0dddfd..668e9e69 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/exists/high_confidence_annotations.py +++ b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/exists/high_confidence_annotations.py @@ -2,24 +2,24 @@ from src.core.tasks.url.operators._shared.container.subtask.exists import \ URLsSubtaskExistsCTEContainer +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion cte = ( select( URL.id ) .join( - AutoLocationIDSubtask, - AutoLocationIDSubtask.url_id == URL.id, + AnnotationLocationAutoSubtask, + AnnotationLocationAutoSubtask.url_id == URL.id, ) .join( - LocationIDSubtaskSuggestion, - LocationIDSubtaskSuggestion.subtask_id == AutoLocationIDSubtask.id, + AnnotationLocationAutoSuggestion, + AnnotationLocationAutoSuggestion.subtask_id == AnnotationLocationAutoSubtask.id, ) .where( - LocationIDSubtaskSuggestion.confidence >= 95, + AnnotationLocationAutoSuggestion.confidence >= 95, ) .cte("high_confidence_annotations_exists") ) diff --git a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/helpers.py b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/helpers.py index acd73c4b..54f114b8 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/helpers.py +++ b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/helpers.py @@ -1,8 +1,8 @@ from sqlalchemy import ColumnElement, exists +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask def get_exists_subtask_query( @@ -11,8 +11,8 @@ def get_exists_subtask_query( return ( exists() .where( - AutoLocationIDSubtask.url_id == URL.id, - AutoLocationIDSubtask.type == subtask_type, + AnnotationLocationAutoSubtask.url_id == URL.id, + AnnotationLocationAutoSubtask.type == subtask_type, ) .label("subtask_entry_exists") ) \ No newline at end of file diff --git a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/impl/batch_link.py b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/impl/batch_link.py index 14c2f260..6d08cc76 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/impl/batch_link.py +++ b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/impl/batch_link.py @@ -3,10 +3,10 @@ from src.core.tasks.url.operators._shared.container.subtask.eligible import URLsSubtaskEligibleCTEContainer from src.core.tasks.url.operators.location_id.subtasks.queries.survey.queries.ctes.subtask.helpers import \ get_exists_subtask_query +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.link.location_batch.sqlalchemy import LinkLocationBatch from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType cte = ( select( diff --git a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location_freq.py b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location_freq.py index 7ab2e0eb..72b4cd81 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location_freq.py +++ b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location_freq.py @@ -3,9 +3,9 @@ from src.core.tasks.url.operators._shared.container.subtask.eligible import URLsSubtaskEligibleCTEContainer from src.core.tasks.url.operators.location_id.subtasks.queries.survey.queries.ctes.subtask.helpers import \ get_exists_subtask_query +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType cte = ( select( diff --git a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/eligible_counts.py b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/eligible_counts.py index b803b7f2..97c47a33 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/eligible_counts.py +++ b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/eligible_counts.py @@ -1,7 +1,7 @@ from sqlalchemy import ColumnElement, func, Integer, select from src.core.tasks.url.operators.location_id.subtasks.queries.survey.queries.ctes.eligible import EligibleContainer -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType def sum_count(col: ColumnElement[bool], subtask_type: LocationIDSubtaskType) -> ColumnElement[int]: diff --git a/src/core/tasks/url/operators/location_id/subtasks/templates/subtask.py b/src/core/tasks/url/operators/location_id/subtasks/templates/subtask.py index 8ee856c2..a5fb050b 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/templates/subtask.py +++ b/src/core/tasks/url/operators/location_id/subtasks/templates/subtask.py @@ -7,8 +7,8 @@ from src.core.tasks.url.operators.location_id.subtasks.models.suggestion import LocationSuggestion from src.db.client.async_ import AsyncDatabaseClient from src.db.enums import TaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic -from src.db.models.impl.url.suggestion.location.auto.suggestion.pydantic import LocationIDSubtaskSuggestionPydantic +from src.db.models.impl.annotation.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic +from src.db.models.impl.annotation.location.auto.suggestion.pydantic import LocationIDSubtaskSuggestionPydantic from src.db.models.impl.url.task_error.pydantic_.insert import URLTaskErrorPydantic from src.db.models.impl.url.task_error.pydantic_.small import URLTaskErrorSmall diff --git a/src/core/tasks/url/operators/record_type/core.py b/src/core/tasks/url/operators/record_type/core.py index 9f63a6a5..d6097ab0 100644 --- a/src/core/tasks/url/operators/record_type/core.py +++ b/src/core/tasks/url/operators/record_type/core.py @@ -7,7 +7,7 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.with_html import URLWithHTML from src.db.enums import TaskType -from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType from src.db.models.impl.url.task_error.pydantic_.small import URLTaskErrorSmall @@ -72,9 +72,9 @@ async def put_results_into_database( record_type = tdo.record_type url_and_record_type_list.append((url_id, record_type)) # Add to database - suggestions: list[AutoRecordTypeSuggestion] = [] + suggestions: list[AnnotationAutoRecordType] = [] for url_id, record_type in url_and_record_type_list: - suggestion = AutoRecordTypeSuggestion( + suggestion = AnnotationAutoRecordType( url_id=url_id, record_type=record_type.value ) diff --git a/src/core/tasks/url/operators/record_type/queries/cte.py b/src/core/tasks/url/operators/record_type/queries/cte.py index 22d3db10..710dab03 100644 --- a/src/core/tasks/url/operators/record_type/queries/cte.py +++ b/src/core/tasks/url/operators/record_type/queries/cte.py @@ -4,7 +4,7 @@ from src.db.helpers.query import not_exists_url, no_url_task_error from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML -from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType class RecordTypeTaskPrerequisiteCTEContainer: @@ -18,7 +18,7 @@ def __init__(self): URLCompressedHTML ) .where( - not_exists_url(AutoRecordTypeSuggestion), + not_exists_url(AnnotationAutoRecordType), no_url_task_error( TaskType.RECORD_TYPE ) diff --git a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/agency.py b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/agency.py index 36fe0a87..440e908a 100644 --- a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/agency.py +++ b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/agency.py @@ -2,31 +2,31 @@ from src.core.tasks.url.operators.validate.queries.ctes.counts.constants import ANONYMOUS_VOTE_RATIO from src.core.tasks.url.operators.validate.queries.ctes.counts.core import ValidatedCountsCTEContainer -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.views.unvalidated_url import UnvalidatedURL _user_counts = ( select( - UserURLAgencySuggestion.url_id, - UserURLAgencySuggestion.agency_id.label("entity"), + AnnotationAgencyUser.url_id, + AnnotationAgencyUser.agency_id.label("entity"), func.count().label("votes") ) .group_by( - UserURLAgencySuggestion.url_id, - UserURLAgencySuggestion.agency_id + AnnotationAgencyUser.url_id, + AnnotationAgencyUser.agency_id ) ) _anon_counts = ( select( - AnonymousAnnotationAgency.url_id, - AnonymousAnnotationAgency.agency_id.label("entity"), + AnnotationAgencyAnon.url_id, + AnnotationAgencyAnon.agency_id.label("entity"), (func.count() / ANONYMOUS_VOTE_RATIO).label("votes") ) .group_by( - AnonymousAnnotationAgency.url_id, - AnonymousAnnotationAgency.agency_id + AnnotationAgencyAnon.url_id, + AnnotationAgencyAnon.agency_id ) ) diff --git a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/location.py b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/location.py index 4e180e18..496b14e1 100644 --- a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/location.py +++ b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/location.py @@ -2,32 +2,31 @@ from src.core.tasks.url.operators.validate.queries.ctes.counts.constants import ANONYMOUS_VOTE_RATIO from src.core.tasks.url.operators.validate.queries.ctes.counts.core import ValidatedCountsCTEContainer -from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.views.unvalidated_url import UnvalidatedURL _user_counts = ( select( - UserLocationSuggestion.url_id, - UserLocationSuggestion.location_id.label("entity"), + AnnotationLocationUser.url_id, + AnnotationLocationUser.location_id.label("entity"), func.count().label("votes") ) .group_by( - UserLocationSuggestion.url_id, - UserLocationSuggestion.location_id + AnnotationLocationUser.url_id, + AnnotationLocationUser.location_id ) ) _anon_counts = ( select( - AnonymousAnnotationLocation.url_id, - AnonymousAnnotationLocation.location_id.label("entity"), + AnnotationLocationAnon.url_id, + AnnotationLocationAnon.location_id.label("entity"), (func.count() / ANONYMOUS_VOTE_RATIO).label("votes") ) .group_by( - AnonymousAnnotationLocation.url_id, - AnonymousAnnotationLocation.location_id + AnnotationLocationAnon.url_id, + AnnotationLocationAnon.location_id ) ) diff --git a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py index 4000e6e2..cec89ef2 100644 --- a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py +++ b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py @@ -1,41 +1,41 @@ from sqlalchemy import select, func from src.core.tasks.url.operators.validate.queries.ctes.counts.core import ValidatedCountsCTEContainer -from src.db.models.impl.link.anonymous_sessions__name_suggestion import LinkAnonymousSessionNameSuggestion -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement +from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion from src.db.models.views.unvalidated_url import UnvalidatedURL _user_counts = ( select( - URLNameSuggestion.url_id, - URLNameSuggestion.suggestion.label("entity"), + AnnotationNameSuggestion.url_id, + AnnotationNameSuggestion.suggestion.label("entity"), func.count().label("votes") ) .join( LinkUserNameSuggestion, - LinkUserNameSuggestion.suggestion_id == URLNameSuggestion.id + LinkUserNameSuggestion.suggestion_id == AnnotationNameSuggestion.id ) .group_by( - URLNameSuggestion.url_id, - URLNameSuggestion.suggestion + AnnotationNameSuggestion.url_id, + AnnotationNameSuggestion.suggestion ) .cte("user_counts") ) _anon_counts = ( select( - URLNameSuggestion.url_id, - URLNameSuggestion.suggestion.label("entity"), + AnnotationNameSuggestion.url_id, + AnnotationNameSuggestion.suggestion.label("entity"), func.count().label("votes") ) .join( - LinkAnonymousSessionNameSuggestion, - LinkAnonymousSessionNameSuggestion.suggestion_id == URLNameSuggestion.id + AnnotationNameAnonEndorsement, + AnnotationNameAnonEndorsement.suggestion_id == AnnotationNameSuggestion.id ) .group_by( - URLNameSuggestion.url_id, - URLNameSuggestion.suggestion + AnnotationNameSuggestion.url_id, + AnnotationNameSuggestion.suggestion ) .cte("anon_counts") ) diff --git a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/record_type.py b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/record_type.py index 65b1f9b0..efc92455 100644 --- a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/record_type.py +++ b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/record_type.py @@ -2,31 +2,31 @@ from src.core.tasks.url.operators.validate.queries.ctes.counts.constants import ANONYMOUS_VOTE_RATIO from src.core.tasks.url.operators.validate.queries.ctes.counts.core import ValidatedCountsCTEContainer -from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType from src.db.models.views.unvalidated_url import UnvalidatedURL _user_counts = ( select( - UserRecordTypeSuggestion.url_id, - UserRecordTypeSuggestion.record_type.label("entity"), + AnnotationUserRecordType.url_id, + AnnotationUserRecordType.record_type.label("entity"), func.count().label("votes") ) .group_by( - UserRecordTypeSuggestion.url_id, - UserRecordTypeSuggestion.record_type + AnnotationUserRecordType.url_id, + AnnotationUserRecordType.record_type ) ) _anon_counts = ( select( - AnonymousAnnotationRecordType.url_id, - AnonymousAnnotationRecordType.record_type.label("entity"), + AnnotationAnonRecordType.url_id, + AnnotationAnonRecordType.record_type.label("entity"), (func.count() * ANONYMOUS_VOTE_RATIO).label("votes") ) .group_by( - AnonymousAnnotationRecordType.url_id, - AnonymousAnnotationRecordType.record_type + AnnotationAnonRecordType.url_id, + AnnotationAnonRecordType.record_type ) ) diff --git a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/url_type.py b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/url_type.py index 72638f19..6c87e69b 100644 --- a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/url_type.py +++ b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/url_type.py @@ -2,31 +2,31 @@ from src.core.tasks.url.operators.validate.queries.ctes.counts.constants import ANONYMOUS_VOTE_RATIO from src.core.tasks.url.operators.validate.queries.ctes.counts.core import ValidatedCountsCTEContainer -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType from src.db.models.views.unvalidated_url import UnvalidatedURL _user_counts = ( select( - UserURLTypeSuggestion.url_id, - UserURLTypeSuggestion.type.label("entity"), + AnnotationUserURLType.url_id, + AnnotationUserURLType.type.label("entity"), func.count().label("votes") ) .group_by( - UserURLTypeSuggestion.url_id, - UserURLTypeSuggestion.type + AnnotationUserURLType.url_id, + AnnotationUserURLType.type ) ) _anon_counts = ( select( - AnonymousAnnotationURLType.url_id, - AnonymousAnnotationURLType.url_type.label("entity"), + AnnotationAnonURLType.url_id, + AnnotationAnonURLType.url_type.label("entity"), (func.count() / ANONYMOUS_VOTE_RATIO).label("votes") ) .group_by( - AnonymousAnnotationURLType.url_id, - AnonymousAnnotationURLType.url_type + AnnotationAnonURLType.url_id, + AnnotationAnonURLType.url_type ) ) diff --git a/src/db/client/async_.py b/src/db/client/async_.py index 125c594e..c780f9d1 100644 --- a/src/db/client/async_.py +++ b/src/db/client/async_.py @@ -1,7 +1,6 @@ from datetime import datetime from functools import wraps from typing import Optional, Any, List -from uuid import UUID, uuid4 from sqlalchemy import select, func, Select, and_, update, Row, text from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker, AsyncEngine @@ -50,13 +49,14 @@ from src.db.client.types import UserSuggestionModel from src.db.config_manager import ConfigManager from src.db.constants import PLACEHOLDER_AGENCY_NAME -from src.db.dtos.url.html_content import URLHTMLContentInfo from src.db.dtos.url.insert import InsertURLsInfo from src.db.dtos.url.raw_html import RawHTMLInfo from src.db.enums import TaskType from src.db.helpers.session import session_helper as sh from src.db.models.impl.agency.enums import AgencyType, JurisdictionType from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.url_type.auto.pydantic.input import AutoRelevancyAnnotationInput from src.db.models.impl.backlog_snapshot import BacklogSnapshot from src.db.models.impl.batch.pydantic.info import BatchInfo from src.db.models.impl.batch.sqlalchemy import Batch @@ -75,15 +75,11 @@ from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML -from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.anonymous import AnonymousSession -from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.auto.pydantic.input import AutoRelevancyAnnotationInput -from src.db.models.impl.url.suggestion.url_type.auto.sqlalchemy import AutoRelevantSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.models.templates_.base import Base @@ -231,7 +227,7 @@ async def add_user_relevant_suggestions( inputs: list[AutoRelevancyAnnotationInput] ): models = [ - AutoRelevantSuggestion( + AnnotationAutoURLType( url_id=input_.url_id, relevant=input_.is_relevant, confidence=input_.confidence, @@ -267,7 +263,7 @@ async def add_user_relevant_suggestion( ): prior_suggestion = await self.get_user_suggestion( session, - model=UserURLTypeSuggestion, + model=AnnotationUserURLType, user_id=user_id, url_id=url_id ) @@ -275,7 +271,7 @@ async def add_user_relevant_suggestion( prior_suggestion.type = suggested_status.value return - suggestion = UserURLTypeSuggestion( + suggestion = AnnotationUserURLType( url_id=url_id, user_id=user_id, type=suggested_status.value @@ -292,7 +288,7 @@ async def add_auto_record_type_suggestion( url_id: int, record_type: RecordType ): - suggestion = AutoRecordTypeSuggestion( + suggestion = AnnotationAutoRecordType( url_id=url_id, record_type=record_type.value ) @@ -308,7 +304,7 @@ async def add_user_record_type_suggestion( ): prior_suggestion = await self.get_user_suggestion( session, - model=UserRecordTypeSuggestion, + model=AnnotationUserRecordType, user_id=user_id, url_id=url_id ) @@ -316,7 +312,7 @@ async def add_user_record_type_suggestion( prior_suggestion.record_type = record_type.value return - suggestion = UserRecordTypeSuggestion( + suggestion = AnnotationUserRecordType( url_id=url_id, user_id=user_id, record_type=record_type.value @@ -570,7 +566,7 @@ async def add_agency_manual_suggestion( ) await session.merge(agency) - url_agency_suggestion = UserURLAgencySuggestion( + url_agency_suggestion = AnnotationAgencyUser( url_id=url_id, agency_id=agency_id, user_id=user_id, diff --git a/src/db/client/types.py b/src/db/client/types.py index e4f70301..8e3bff0d 100644 --- a/src/db/client/types.py +++ b/src/db/client/types.py @@ -1,5 +1,5 @@ -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType -UserSuggestionModel = UserURLTypeSuggestion or UserRecordTypeSuggestion or UserURLAgencySuggestion +UserSuggestionModel = AnnotationUserURLType or AnnotationUserRecordType or AnnotationAgencyUser diff --git a/src/db/constants.py b/src/db/constants.py index c8821e7e..87fd1f19 100644 --- a/src/db/constants.py +++ b/src/db/constants.py @@ -1,13 +1,13 @@ -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType PLACEHOLDER_AGENCY_NAME = "PLACEHOLDER_AGENCY_NAME" STANDARD_ROW_LIMIT = 100 USER_ANNOTATION_MODELS = [ - UserURLTypeSuggestion, - UserRecordTypeSuggestion, - UserURLAgencySuggestion + AnnotationUserURLType, + AnnotationUserRecordType, + AnnotationAgencyUser ] \ No newline at end of file diff --git a/src/db/dto_converter.py b/src/db/dto_converter.py index 4c91a353..4eb5a4cd 100644 --- a/src/db/dto_converter.py +++ b/src/db/dto_converter.py @@ -1,23 +1,19 @@ from collections import Counter -from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo from src.api.endpoints.annotate.relevance.get.dto import RelevanceAnnotationResponseInfo -from src.api.endpoints.review.next.dto import FinalReviewAnnotationRelevantInfo, FinalReviewAnnotationRecordTypeInfo, \ - FinalReviewAnnotationAgencyInfo -from src.core.enums import RecordType, SuggestionType +from src.api.endpoints.review.next.dto import FinalReviewAnnotationRelevantInfo, FinalReviewAnnotationRecordTypeInfo +from src.core.enums import RecordType from src.core.tasks.url.operators.html.scraper.parser.dtos.response_html import ResponseHTMLInfo from src.core.tasks.url.operators.html.scraper.parser.mapping import ENUM_TO_ATTRIBUTE_MAPPING from src.db.dtos.url.html_content import URLHTMLContentInfo from src.db.dtos.url.with_html import URLWithHTML -from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.html.content.enums import HTMLContentType from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.auto.sqlalchemy import AutoRelevantSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType class DTOConverter: @@ -28,8 +24,8 @@ class DTOConverter: @staticmethod def final_review_annotation_relevant_info( - user_suggestions: list[UserURLTypeSuggestion], - auto_suggestion: AutoRelevantSuggestion + user_suggestions: list[AnnotationUserURLType], + auto_suggestion: AnnotationAutoURLType ) -> FinalReviewAnnotationRelevantInfo: auto_value = RelevanceAnnotationResponseInfo( @@ -48,8 +44,8 @@ def final_review_annotation_relevant_info( @staticmethod def final_review_annotation_record_type_info( - user_suggestions: list[UserRecordTypeSuggestion], - auto_suggestion: AutoRecordTypeSuggestion + user_suggestions: list[AnnotationUserRecordType], + auto_suggestion: AnnotationAutoRecordType ): if auto_suggestion is None: diff --git a/src/db/models/impl/agency/sqlalchemy.py b/src/db/models/impl/agency/sqlalchemy.py index 9e99a0be..a6c9c1cf 100644 --- a/src/db/models/impl/agency/sqlalchemy.py +++ b/src/db/models/impl/agency/sqlalchemy.py @@ -27,8 +27,8 @@ class Agency( ) # Relationships - automated_suggestions = relationship("AgencyIDSubtaskSuggestion") - user_suggestions = relationship("UserURLAgencySuggestion", back_populates="agency") + automated_suggestions = relationship("AnnotationAgencyAutoSuggestion") + user_suggestions = relationship("AnnotationAgencyUser", back_populates="agency") confirmed_urls = relationship("LinkURLAgency", back_populates="agency") locations = relationship( diff --git a/src/db/models/impl/url/suggestion/README.md b/src/db/models/impl/annotation/README.md similarity index 100% rename from src/db/models/impl/url/suggestion/README.md rename to src/db/models/impl/annotation/README.md diff --git a/src/db/models/impl/link/user_name_suggestion/__init__.py b/src/db/models/impl/annotation/__init__.py similarity index 100% rename from src/db/models/impl/link/user_name_suggestion/__init__.py rename to src/db/models/impl/annotation/__init__.py diff --git a/src/db/models/impl/url/suggestion/__init__.py b/src/db/models/impl/annotation/agency/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/__init__.py rename to src/db/models/impl/annotation/agency/__init__.py diff --git a/src/db/models/impl/url/suggestion/agency/__init__.py b/src/db/models/impl/annotation/agency/anon/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/agency/__init__.py rename to src/db/models/impl/annotation/agency/anon/__init__.py diff --git a/src/db/models/impl/url/suggestion/anonymous/agency/sqlalchemy.py b/src/db/models/impl/annotation/agency/anon/sqlalchemy.py similarity index 83% rename from src/db/models/impl/url/suggestion/anonymous/agency/sqlalchemy.py rename to src/db/models/impl/annotation/agency/anon/sqlalchemy.py index a99c92e8..a2da332b 100644 --- a/src/db/models/impl/url/suggestion/anonymous/agency/sqlalchemy.py +++ b/src/db/models/impl/annotation/agency/anon/sqlalchemy.py @@ -4,14 +4,14 @@ from src.db.models.templates_.base import Base -class AnonymousAnnotationAgency( +class AnnotationAgencyAnon( Base, URLDependentMixin, AgencyDependentMixin, CreatedAtMixin, AnonymousSessionMixin ): - __tablename__ = "annotation__anon__agency" + __tablename__ = "annotation__agency__anon" __table_args__ = ( PrimaryKeyConstraint("session_id", "url_id", "agency_id"), ) \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/agency/subtask/__init__.py b/src/db/models/impl/annotation/agency/auto/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/agency/subtask/__init__.py rename to src/db/models/impl/annotation/agency/auto/__init__.py diff --git a/src/db/models/impl/url/suggestion/agency/suggestion/__init__.py b/src/db/models/impl/annotation/agency/auto/subtask/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/agency/suggestion/__init__.py rename to src/db/models/impl/annotation/agency/auto/subtask/__init__.py diff --git a/src/db/models/impl/url/suggestion/agency/subtask/enum.py b/src/db/models/impl/annotation/agency/auto/subtask/enum.py similarity index 100% rename from src/db/models/impl/url/suggestion/agency/subtask/enum.py rename to src/db/models/impl/annotation/agency/auto/subtask/enum.py diff --git a/src/db/models/impl/url/suggestion/agency/subtask/pydantic.py b/src/db/models/impl/annotation/agency/auto/subtask/pydantic.py similarity index 61% rename from src/db/models/impl/url/suggestion/agency/subtask/pydantic.py rename to src/db/models/impl/annotation/agency/auto/subtask/pydantic.py index f2e9be57..4faee30d 100644 --- a/src/db/models/impl/url/suggestion/agency/subtask/pydantic.py +++ b/src/db/models/impl/annotation/agency/auto/subtask/pydantic.py @@ -1,5 +1,5 @@ -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask from src.db.models.templates_.base import Base from src.db.templates.markers.bulk.insert import BulkInsertableModel @@ -14,4 +14,4 @@ class URLAutoAgencyIDSubtaskPydantic(BulkInsertableModel): @classmethod def sa_model(cls) -> type_alias[Base]: - return URLAutoAgencyIDSubtask \ No newline at end of file + return AnnotationAgencyAutoSubtask \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py b/src/db/models/impl/annotation/agency/auto/subtask/sqlalchemy.py similarity index 76% rename from src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py rename to src/db/models/impl/annotation/agency/auto/subtask/sqlalchemy.py index 9fa3e5f5..56383a6a 100644 --- a/src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py +++ b/src/db/models/impl/annotation/agency/auto/subtask/sqlalchemy.py @@ -1,20 +1,20 @@ from sqlalchemy.orm import relationship, Mapped from src.db.models.helpers import enum_column -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode from src.db.models.mixins import URLDependentMixin, CreatedAtMixin, TaskDependentMixin from src.db.models.templates_.with_id import WithIDBase import sqlalchemy as sa -class URLAutoAgencyIDSubtask( +class AnnotationAgencyAutoSubtask( WithIDBase, URLDependentMixin, TaskDependentMixin, CreatedAtMixin ): - __tablename__ = "annotation__auto__agency__subtasks" + __tablename__ = "annotation__agency__auto__subtasks" type: Mapped[AutoAgencyIDSubtaskType] = enum_column( AutoAgencyIDSubtaskType, @@ -30,6 +30,6 @@ class URLAutoAgencyIDSubtask( ) suggestions = relationship( - "AgencyIDSubtaskSuggestion", + "AnnotationAgencyAutoSuggestion", cascade="all, delete-orphan" ) \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/anonymous/agency/__init__.py b/src/db/models/impl/annotation/agency/auto/suggestion/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/anonymous/agency/__init__.py rename to src/db/models/impl/annotation/agency/auto/suggestion/__init__.py diff --git a/src/db/models/impl/url/suggestion/agency/suggestion/pydantic.py b/src/db/models/impl/annotation/agency/auto/suggestion/pydantic.py similarity index 69% rename from src/db/models/impl/url/suggestion/agency/suggestion/pydantic.py rename to src/db/models/impl/annotation/agency/auto/suggestion/pydantic.py index 5a0fd2b8..1ec38502 100644 --- a/src/db/models/impl/url/suggestion/agency/suggestion/pydantic.py +++ b/src/db/models/impl/annotation/agency/auto/suggestion/pydantic.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from src.db.models.templates_.base import Base from src.db.templates.markers.bulk.insert import BulkInsertableModel @@ -13,4 +13,4 @@ class AgencyIDSubtaskSuggestionPydantic( @classmethod def sa_model(cls) -> type[Base]: """Defines the SQLAlchemy model.""" - return AgencyIDSubtaskSuggestion \ No newline at end of file + return AnnotationAgencyAutoSuggestion \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py b/src/db/models/impl/annotation/agency/auto/suggestion/sqlalchemy.py similarity index 77% rename from src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py rename to src/db/models/impl/annotation/agency/auto/suggestion/sqlalchemy.py index ff3748c6..5cb715a5 100644 --- a/src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py +++ b/src/db/models/impl/annotation/agency/auto/suggestion/sqlalchemy.py @@ -5,17 +5,17 @@ from src.db.models.templates_.with_id import WithIDBase -class AgencyIDSubtaskSuggestion( +class AnnotationAgencyAutoSuggestion( WithIDBase, CreatedAtMixin, AgencyDependentMixin, ): - __tablename__ = "annotation__auto__agency__suggestions" + __tablename__ = "annotation__agency__auto__suggestions" subtask_id = sa.Column( sa.Integer, - sa.ForeignKey("annotation__auto__agency__subtasks.id"), + sa.ForeignKey("annotation__agency__auto__subtasks.id"), nullable=False ) confidence = sa.Column( diff --git a/src/db/models/impl/url/suggestion/anonymous/location/__init__.py b/src/db/models/impl/annotation/agency/user/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/anonymous/location/__init__.py rename to src/db/models/impl/annotation/agency/user/__init__.py diff --git a/src/db/models/impl/url/suggestion/agency/user.py b/src/db/models/impl/annotation/agency/user/sqlalchemy.py similarity index 85% rename from src/db/models/impl/url/suggestion/agency/user.py rename to src/db/models/impl/annotation/agency/user/sqlalchemy.py index c6154b16..6b00e06c 100644 --- a/src/db/models/impl/url/suggestion/agency/user.py +++ b/src/db/models/impl/annotation/agency/user/sqlalchemy.py @@ -6,8 +6,8 @@ from src.db.models.templates_.base import Base -class UserURLAgencySuggestion(URLDependentMixin, Base): - __tablename__ = "annotation__user__agency" +class AnnotationAgencyUser(URLDependentMixin, Base): + __tablename__ = "annotation__agency__user" __table_args__ = ( PrimaryKeyConstraint("agency_id", "url_id", "user_id"), ) diff --git a/src/db/models/impl/url/suggestion/anonymous/record_type/__init__.py b/src/db/models/impl/annotation/location/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/anonymous/record_type/__init__.py rename to src/db/models/impl/annotation/location/__init__.py diff --git a/src/db/models/impl/url/suggestion/anonymous/session/__init__.py b/src/db/models/impl/annotation/location/anon/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/anonymous/session/__init__.py rename to src/db/models/impl/annotation/location/anon/__init__.py diff --git a/src/db/models/impl/url/suggestion/anonymous/location/sqlalchemy.py b/src/db/models/impl/annotation/location/anon/sqlalchemy.py similarity index 83% rename from src/db/models/impl/url/suggestion/anonymous/location/sqlalchemy.py rename to src/db/models/impl/annotation/location/anon/sqlalchemy.py index c44d76bd..6855b021 100644 --- a/src/db/models/impl/url/suggestion/anonymous/location/sqlalchemy.py +++ b/src/db/models/impl/annotation/location/anon/sqlalchemy.py @@ -4,7 +4,7 @@ from src.db.models.templates_.base import Base -class AnonymousAnnotationLocation( +class AnnotationLocationAnon( Base, URLDependentMixin, LocationDependentMixin, @@ -12,7 +12,7 @@ class AnonymousAnnotationLocation( AnonymousSessionMixin ): - __tablename__ = "annotation__anon__location" + __tablename__ = "annotation__location__anon" __table_args__ = ( PrimaryKeyConstraint("session_id", "url_id", "location_id"), ) \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/anonymous/url_type/__init__.py b/src/db/models/impl/annotation/location/auto/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/anonymous/url_type/__init__.py rename to src/db/models/impl/annotation/location/auto/__init__.py diff --git a/src/db/models/impl/url/suggestion/location/__init__.py b/src/db/models/impl/annotation/location/auto/subtask/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/location/__init__.py rename to src/db/models/impl/annotation/location/auto/subtask/__init__.py diff --git a/src/db/models/impl/url/suggestion/location/auto/subtask/constants.py b/src/db/models/impl/annotation/location/auto/subtask/constants.py similarity index 100% rename from src/db/models/impl/url/suggestion/location/auto/subtask/constants.py rename to src/db/models/impl/annotation/location/auto/subtask/constants.py diff --git a/src/db/models/impl/url/suggestion/location/auto/subtask/enums.py b/src/db/models/impl/annotation/location/auto/subtask/enums.py similarity index 100% rename from src/db/models/impl/url/suggestion/location/auto/subtask/enums.py rename to src/db/models/impl/annotation/location/auto/subtask/enums.py diff --git a/src/db/models/impl/url/suggestion/location/auto/subtask/pydantic.py b/src/db/models/impl/annotation/location/auto/subtask/pydantic.py similarity index 60% rename from src/db/models/impl/url/suggestion/location/auto/subtask/pydantic.py rename to src/db/models/impl/annotation/location/auto/subtask/pydantic.py index 091a00b9..8bf8c1ed 100644 --- a/src/db/models/impl/url/suggestion/location/auto/subtask/pydantic.py +++ b/src/db/models/impl/annotation/location/auto/subtask/pydantic.py @@ -1,5 +1,5 @@ -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask from src.db.models.templates_.base import Base from src.db.templates.markers.bulk.insert import BulkInsertableModel @@ -16,4 +16,4 @@ class AutoLocationIDSubtaskPydantic( @classmethod def sa_model(cls) -> type[Base]: """Defines the SQLAlchemy model.""" - return AutoLocationIDSubtask \ No newline at end of file + return AnnotationLocationAutoSubtask \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/location/auto/subtask/sqlalchemy.py b/src/db/models/impl/annotation/location/auto/subtask/sqlalchemy.py similarity index 63% rename from src/db/models/impl/url/suggestion/location/auto/subtask/sqlalchemy.py rename to src/db/models/impl/annotation/location/auto/subtask/sqlalchemy.py index 7d4e67bf..61654851 100644 --- a/src/db/models/impl/url/suggestion/location/auto/subtask/sqlalchemy.py +++ b/src/db/models/impl/annotation/location/auto/subtask/sqlalchemy.py @@ -2,20 +2,20 @@ from sqlalchemy.orm import relationship, Mapped from src.db.models.helpers import enum_column -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion from src.db.models.mixins import CreatedAtMixin, TaskDependentMixin, URLDependentMixin from src.db.models.templates_.with_id import WithIDBase -class AutoLocationIDSubtask( +class AnnotationLocationAutoSubtask( WithIDBase, CreatedAtMixin, TaskDependentMixin, URLDependentMixin, ): - __tablename__ = 'annotation__auto__location__subtasks' + __tablename__ = 'annotation__location__auto__subtasks' locations_found = Column(Boolean(), nullable=False) type: Mapped[LocationIDSubtaskType] = enum_column( @@ -24,5 +24,5 @@ class AutoLocationIDSubtask( ) suggestions = relationship( - LocationIDSubtaskSuggestion + AnnotationLocationAutoSuggestion ) \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/location/auto/__init__.py b/src/db/models/impl/annotation/location/auto/suggestion/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/location/auto/__init__.py rename to src/db/models/impl/annotation/location/auto/suggestion/__init__.py diff --git a/src/db/models/impl/url/suggestion/location/auto/suggestion/pydantic.py b/src/db/models/impl/annotation/location/auto/suggestion/pydantic.py similarity index 68% rename from src/db/models/impl/url/suggestion/location/auto/suggestion/pydantic.py rename to src/db/models/impl/annotation/location/auto/suggestion/pydantic.py index 1ddc53d7..792e3bd4 100644 --- a/src/db/models/impl/url/suggestion/location/auto/suggestion/pydantic.py +++ b/src/db/models/impl/annotation/location/auto/suggestion/pydantic.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion from src.db.models.templates_.base import Base from src.db.templates.markers.bulk.insert import BulkInsertableModel @@ -12,4 +12,4 @@ class LocationIDSubtaskSuggestionPydantic(BulkInsertableModel): @classmethod def sa_model(cls) -> type[Base]: """Defines the SQLAlchemy model.""" - return LocationIDSubtaskSuggestion \ No newline at end of file + return AnnotationLocationAutoSuggestion \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/location/auto/suggestion/sqlalchemy.py b/src/db/models/impl/annotation/location/auto/suggestion/sqlalchemy.py similarity index 78% rename from src/db/models/impl/url/suggestion/location/auto/suggestion/sqlalchemy.py rename to src/db/models/impl/annotation/location/auto/suggestion/sqlalchemy.py index 650ee9a7..f76d9eef 100644 --- a/src/db/models/impl/url/suggestion/location/auto/suggestion/sqlalchemy.py +++ b/src/db/models/impl/annotation/location/auto/suggestion/sqlalchemy.py @@ -5,11 +5,11 @@ from src.db.models.templates_.base import Base -class LocationIDSubtaskSuggestion( +class AnnotationLocationAutoSuggestion( Base, ): - __tablename__ = 'annotation__auto__location__suggestions' + __tablename__ = 'annotation__location__auto__suggestions' __table_args__ = ( PrimaryKeyConstraint( 'subtask_id', @@ -19,7 +19,7 @@ class LocationIDSubtaskSuggestion( ) subtask_id = Column( Integer, - ForeignKey('annotation__auto__location__subtasks.id'), + ForeignKey('annotation__location__auto__subtasks.id'), nullable=False, primary_key=True, ) diff --git a/src/db/models/impl/url/suggestion/location/auto/subtask/__init__.py b/src/db/models/impl/annotation/location/user/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/location/auto/subtask/__init__.py rename to src/db/models/impl/annotation/location/user/__init__.py diff --git a/src/db/models/impl/url/suggestion/location/user/pydantic.py b/src/db/models/impl/annotation/location/user/pydantic.py similarity index 70% rename from src/db/models/impl/url/suggestion/location/user/pydantic.py rename to src/db/models/impl/annotation/location/user/pydantic.py index 11f2218b..c3bdcf11 100644 --- a/src/db/models/impl/url/suggestion/location/user/pydantic.py +++ b/src/db/models/impl/annotation/location/user/pydantic.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.templates_.base import Base from src.db.templates.markers.bulk.insert import BulkInsertableModel @@ -13,4 +13,4 @@ class UserLocationSuggestionPydantic( @classmethod def sa_model(cls) -> type[Base]: """Defines the SQLAlchemy model.""" - return UserLocationSuggestion + return AnnotationLocationUser diff --git a/src/db/models/impl/url/suggestion/location/user/sqlalchemy.py b/src/db/models/impl/annotation/location/user/sqlalchemy.py similarity index 88% rename from src/db/models/impl/url/suggestion/location/user/sqlalchemy.py rename to src/db/models/impl/annotation/location/user/sqlalchemy.py index 76883cfb..614912fd 100644 --- a/src/db/models/impl/url/suggestion/location/user/sqlalchemy.py +++ b/src/db/models/impl/annotation/location/user/sqlalchemy.py @@ -6,13 +6,13 @@ from src.db.models.templates_.base import Base -class UserLocationSuggestion( +class AnnotationLocationUser( Base, CreatedAtMixin, LocationDependentMixin, URLDependentMixin ): - __tablename__ = 'annotation__user__location' + __tablename__ = 'annotation__location__user' __table_args__ = ( PrimaryKeyConstraint('url_id', 'location_id', 'user_id'), ) diff --git a/src/db/models/impl/url/suggestion/location/auto/suggestion/__init__.py b/src/db/models/impl/annotation/name/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/location/auto/suggestion/__init__.py rename to src/db/models/impl/annotation/name/__init__.py diff --git a/src/db/models/impl/url/suggestion/location/user/__init__.py b/src/db/models/impl/annotation/name/anon/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/location/user/__init__.py rename to src/db/models/impl/annotation/name/anon/__init__.py diff --git a/src/db/models/impl/link/anonymous_sessions__name_suggestion.py b/src/db/models/impl/annotation/name/anon/sqlalchemy.py similarity index 75% rename from src/db/models/impl/link/anonymous_sessions__name_suggestion.py rename to src/db/models/impl/annotation/name/anon/sqlalchemy.py index a5773bd7..8e24a515 100644 --- a/src/db/models/impl/link/anonymous_sessions__name_suggestion.py +++ b/src/db/models/impl/annotation/name/anon/sqlalchemy.py @@ -4,15 +4,15 @@ from src.db.models.templates_.base import Base -class LinkAnonymousSessionNameSuggestion( +class AnnotationNameAnonEndorsement( Base, AnonymousSessionMixin, CreatedAtMixin ): - __tablename__ = "link__anonymous_sessions__name_suggestions" + __tablename__ = "annotation__name__anon__endorsements" suggestion_id = Column( Integer, - ForeignKey("url_name_suggestions.id"), + ForeignKey("annotation__name__suggestions.id"), primary_key=True, nullable=False, ) diff --git a/src/db/models/impl/url/suggestion/name/__init__.py b/src/db/models/impl/annotation/name/suggestion/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/name/__init__.py rename to src/db/models/impl/annotation/name/suggestion/__init__.py diff --git a/src/db/models/impl/url/suggestion/name/enums.py b/src/db/models/impl/annotation/name/suggestion/enums.py similarity index 100% rename from src/db/models/impl/url/suggestion/name/enums.py rename to src/db/models/impl/annotation/name/suggestion/enums.py diff --git a/src/db/models/impl/annotation/name/suggestion/pydantic.py b/src/db/models/impl/annotation/name/suggestion/pydantic.py new file mode 100644 index 00000000..55423a0a --- /dev/null +++ b/src/db/models/impl/annotation/name/suggestion/pydantic.py @@ -0,0 +1,17 @@ +from pydantic import Field + +from src.db.models.impl.annotation.location.auto.subtask.constants import MAX_SUGGESTION_LENGTH +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.templates.markers.bulk.insert import BulkInsertableModel + + +class URLNameSuggestionPydantic(BulkInsertableModel): + + url_id: int + suggestion: str = Field(..., max_length=MAX_SUGGESTION_LENGTH) + source: NameSuggestionSource + + @classmethod + def sa_model(cls) -> type[AnnotationNameSuggestion]: + return AnnotationNameSuggestion \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/name/sqlalchemy.py b/src/db/models/impl/annotation/name/suggestion/sqlalchemy.py similarity index 65% rename from src/db/models/impl/url/suggestion/name/sqlalchemy.py rename to src/db/models/impl/annotation/name/suggestion/sqlalchemy.py index 2f11542d..5aeee478 100644 --- a/src/db/models/impl/url/suggestion/name/sqlalchemy.py +++ b/src/db/models/impl/annotation/name/suggestion/sqlalchemy.py @@ -2,19 +2,19 @@ from sqlalchemy.orm import Mapped from src.db.models.helpers import enum_column -from src.db.models.impl.url.suggestion.location.auto.subtask.constants import MAX_SUGGESTION_LENGTH -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource +from src.db.models.impl.annotation.location.auto.subtask.constants import MAX_SUGGESTION_LENGTH +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.mixins import URLDependentMixin, CreatedAtMixin from src.db.models.templates_.with_id import WithIDBase -class URLNameSuggestion( +class AnnotationNameSuggestion( WithIDBase, CreatedAtMixin, URLDependentMixin ): - __tablename__ = "url_name_suggestions" + __tablename__ = "annotation__name__suggestions" suggestion = Column(String(MAX_SUGGESTION_LENGTH), nullable=False) source: Mapped[NameSuggestionSource] = enum_column( diff --git a/src/db/models/impl/url/suggestion/record_type/__init__.py b/src/db/models/impl/annotation/name/user/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/record_type/__init__.py rename to src/db/models/impl/annotation/name/user/__init__.py diff --git a/src/db/models/impl/link/user_name_suggestion/pydantic.py b/src/db/models/impl/annotation/name/user/pydantic.py similarity index 75% rename from src/db/models/impl/link/user_name_suggestion/pydantic.py rename to src/db/models/impl/annotation/name/user/pydantic.py index 6e07989b..95fe0150 100644 --- a/src/db/models/impl/link/user_name_suggestion/pydantic.py +++ b/src/db/models/impl/annotation/name/user/pydantic.py @@ -1,4 +1,4 @@ -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion from src.db.templates.markers.bulk.insert import BulkInsertableModel diff --git a/src/db/models/impl/link/user_name_suggestion/sqlalchemy.py b/src/db/models/impl/annotation/name/user/sqlalchemy.py similarity index 78% rename from src/db/models/impl/link/user_name_suggestion/sqlalchemy.py rename to src/db/models/impl/annotation/name/user/sqlalchemy.py index 316a8e3c..cf23dd6f 100644 --- a/src/db/models/impl/link/user_name_suggestion/sqlalchemy.py +++ b/src/db/models/impl/annotation/name/user/sqlalchemy.py @@ -9,11 +9,11 @@ class LinkUserNameSuggestion( CreatedAtMixin, ): - __tablename__ = "link_user_name_suggestions" + __tablename__ = "annotation__name__user__endorsements" suggestion_id = Column( Integer, - ForeignKey("url_name_suggestions.id"), + ForeignKey("annotation__name__suggestions.id"), primary_key=True, nullable=False, ) diff --git a/src/db/models/impl/url/suggestion/url_type/__init__.py b/src/db/models/impl/annotation/record_type/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/url_type/__init__.py rename to src/db/models/impl/annotation/record_type/__init__.py diff --git a/src/db/models/impl/url/suggestion/url_type/auto/__init__.py b/src/db/models/impl/annotation/record_type/anon/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/url_type/auto/__init__.py rename to src/db/models/impl/annotation/record_type/anon/__init__.py diff --git a/src/db/models/impl/url/suggestion/anonymous/record_type/sqlalchemy.py b/src/db/models/impl/annotation/record_type/anon/sqlalchemy.py similarity index 87% rename from src/db/models/impl/url/suggestion/anonymous/record_type/sqlalchemy.py rename to src/db/models/impl/annotation/record_type/anon/sqlalchemy.py index 67432bce..304ab1be 100644 --- a/src/db/models/impl/url/suggestion/anonymous/record_type/sqlalchemy.py +++ b/src/db/models/impl/annotation/record_type/anon/sqlalchemy.py @@ -7,13 +7,13 @@ from src.db.models.templates_.base import Base -class AnonymousAnnotationRecordType( +class AnnotationAnonRecordType( Base, URLDependentMixin, CreatedAtMixin, AnonymousSessionMixin ): - __tablename__ = "annotation__anon__record_type" + __tablename__ = "annotation__record_type__anon" __table_args__ = ( PrimaryKeyConstraint("session_id", "url_id", "record_type"), ) diff --git a/src/db/models/impl/url/suggestion/url_type/auto/pydantic/__init__.py b/src/db/models/impl/annotation/record_type/auto/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/url_type/auto/pydantic/__init__.py rename to src/db/models/impl/annotation/record_type/auto/__init__.py diff --git a/src/db/models/impl/url/suggestion/record_type/auto.py b/src/db/models/impl/annotation/record_type/auto/sqlalchemy.py similarity index 89% rename from src/db/models/impl/url/suggestion/record_type/auto.py rename to src/db/models/impl/annotation/record_type/auto/sqlalchemy.py index 39f345af..b09f01d8 100644 --- a/src/db/models/impl/url/suggestion/record_type/auto.py +++ b/src/db/models/impl/annotation/record_type/auto/sqlalchemy.py @@ -8,13 +8,13 @@ from src.db.models.types import record_type_values -class AutoRecordTypeSuggestion( +class AnnotationAutoRecordType( UpdatedAtMixin, CreatedAtMixin, URLDependentMixin, Base, ): - __tablename__ = "annotation__auto__record_type" + __tablename__ = "annotation__record_type__auto" record_type = Column(postgresql.ENUM(*record_type_values, name='record_type'), nullable=False) __table_args__ = ( diff --git a/src/db/models/impl/annotation/record_type/user/__init__.py b/src/db/models/impl/annotation/record_type/user/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/suggestion/record_type/user.py b/src/db/models/impl/annotation/record_type/user/user.py similarity index 90% rename from src/db/models/impl/url/suggestion/record_type/user.py rename to src/db/models/impl/annotation/record_type/user/user.py index f238fca7..689d985b 100644 --- a/src/db/models/impl/url/suggestion/record_type/user.py +++ b/src/db/models/impl/annotation/record_type/user/user.py @@ -8,13 +8,13 @@ from src.db.models.types import record_type_values -class UserRecordTypeSuggestion( +class AnnotationUserRecordType( UpdatedAtMixin, CreatedAtMixin, URLDependentMixin, Base, ): - __tablename__ = "annotation__user__record_type" + __tablename__ = "annotation__record_type__user" __table_args__ = ( PrimaryKeyConstraint("url_id", "user_id"), ) diff --git a/src/db/models/impl/annotation/url_type/__init__.py b/src/db/models/impl/annotation/url_type/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/annotation/url_type/anon/__init__.py b/src/db/models/impl/annotation/url_type/anon/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/suggestion/anonymous/url_type/sqlalchemy.py b/src/db/models/impl/annotation/url_type/anon/sqlalchemy.py similarity index 87% rename from src/db/models/impl/url/suggestion/anonymous/url_type/sqlalchemy.py rename to src/db/models/impl/annotation/url_type/anon/sqlalchemy.py index 87efb760..a1de1826 100644 --- a/src/db/models/impl/url/suggestion/anonymous/url_type/sqlalchemy.py +++ b/src/db/models/impl/annotation/url_type/anon/sqlalchemy.py @@ -7,13 +7,13 @@ from src.db.models.templates_.base import Base -class AnonymousAnnotationURLType( +class AnnotationAnonURLType( Base, URLDependentMixin, CreatedAtMixin, AnonymousSessionMixin ): - __tablename__ = "annotation__anon__url_type" + __tablename__ = "annotation__url_type__anon" __table_args__ = ( PrimaryKeyConstraint("session_id", "url_id", "url_type"), ) diff --git a/src/db/models/impl/annotation/url_type/auto/__init__.py b/src/db/models/impl/annotation/url_type/auto/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/annotation/url_type/auto/pydantic/__init__.py b/src/db/models/impl/annotation/url_type/auto/pydantic/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/suggestion/url_type/auto/pydantic/input.py b/src/db/models/impl/annotation/url_type/auto/pydantic/input.py similarity index 100% rename from src/db/models/impl/url/suggestion/url_type/auto/pydantic/input.py rename to src/db/models/impl/annotation/url_type/auto/pydantic/input.py diff --git a/src/db/models/impl/url/suggestion/url_type/auto/sqlalchemy.py b/src/db/models/impl/annotation/url_type/auto/sqlalchemy.py similarity index 90% rename from src/db/models/impl/url/suggestion/url_type/auto/sqlalchemy.py rename to src/db/models/impl/annotation/url_type/auto/sqlalchemy.py index 7944ba5e..cc5fb7b8 100644 --- a/src/db/models/impl/url/suggestion/url_type/auto/sqlalchemy.py +++ b/src/db/models/impl/annotation/url_type/auto/sqlalchemy.py @@ -6,13 +6,13 @@ from src.db.models.templates_.with_id import WithIDBase -class AutoRelevantSuggestion( +class AnnotationAutoURLType( UpdatedAtMixin, CreatedAtMixin, URLDependentMixin, Base, ): - __tablename__ = "annotation__auto__url_type" + __tablename__ = "annotation__url_type__auto" relevant = Column(Boolean, nullable=True) confidence = Column(Float, nullable=True) diff --git a/src/db/models/impl/annotation/url_type/user/__init__.py b/src/db/models/impl/annotation/url_type/user/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/suggestion/url_type/user.py b/src/db/models/impl/annotation/url_type/user/sqlalchemy.py similarity index 91% rename from src/db/models/impl/url/suggestion/url_type/user.py rename to src/db/models/impl/annotation/url_type/user/sqlalchemy.py index 896a6054..af84a758 100644 --- a/src/db/models/impl/url/suggestion/url_type/user.py +++ b/src/db/models/impl/annotation/url_type/user/sqlalchemy.py @@ -9,13 +9,13 @@ from src.db.models.templates_.with_id import WithIDBase -class UserURLTypeSuggestion( +class AnnotationUserURLType( UpdatedAtMixin, CreatedAtMixin, URLDependentMixin, Base, ): - __tablename__ = "annotation__user__url_type" + __tablename__ = "annotation__url_type__user" __table_args__ = ( PrimaryKeyConstraint("url_id", "user_id"), ) diff --git a/src/db/models/impl/anon_session/__init__.py b/src/db/models/impl/anon_session/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/suggestion/anonymous/session/sqlalchemy.py b/src/db/models/impl/anon_session/sqlalchemy.py similarity index 100% rename from src/db/models/impl/url/suggestion/anonymous/session/sqlalchemy.py rename to src/db/models/impl/anon_session/sqlalchemy.py diff --git a/src/db/models/impl/url/core/sqlalchemy.py b/src/db/models/impl/url/core/sqlalchemy.py index de4af177..dd52c1e1 100644 --- a/src/db/models/impl/url/core/sqlalchemy.py +++ b/src/db/models/impl/url/core/sqlalchemy.py @@ -1,18 +1,23 @@ from sqlalchemy import Column, Text, String, JSON, case, literal, Boolean from sqlalchemy.ext.hybrid import hybrid_property from sqlalchemy.orm import relationship, Mapped -from sqlalchemy.util import hybridproperty from src.collectors.enums import URLStatus from src.db.models.helpers import enum_column +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.impl.url.checked_for_duplicate import URLCheckedForDuplicate from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from src.db.models.mixins import UpdatedAtMixin, CreatedAtMixin from src.db.models.templates_.with_id import WithIDBase @@ -90,35 +95,35 @@ def full_url(cls): name_suggestions = relationship( - URLNameSuggestion + AnnotationNameSuggestion ) # Location user_location_suggestions = relationship( - UserLocationSuggestion + AnnotationLocationUser ) user_location_suggestion_not_found = relationship( LinkUserSuggestionLocationNotFound ) auto_location_subtasks = relationship( - AutoLocationIDSubtask + AnnotationLocationAutoSubtask ) # Agency user_agency_suggestions = relationship( - "UserURLAgencySuggestion", back_populates="url") + AnnotationAgencyUser, back_populates="url") auto_agency_subtasks = relationship( - "URLAutoAgencyIDSubtask" + AnnotationAgencyAutoSubtask ) # Record Type auto_record_type_suggestion = relationship( - "AutoRecordTypeSuggestion", uselist=False, back_populates="url") + AnnotationAutoRecordType, uselist=False, back_populates="url") user_record_type_suggestions = relationship( - "UserRecordTypeSuggestion", back_populates="url") + AnnotationUserRecordType, back_populates="url") # Relvant/URL Type auto_relevant_suggestion = relationship( - "AutoRelevantSuggestion", uselist=False, back_populates="url") + AnnotationAutoURLType, uselist=False, back_populates="url") user_relevant_suggestions = relationship( - "UserURLTypeSuggestion", back_populates="url") + AnnotationUserURLType, back_populates="url") reviewing_user = relationship( "ReviewingUserURL", uselist=False, back_populates="url") diff --git a/src/db/models/impl/url/suggestion/anonymous/__init__.py b/src/db/models/impl/url/suggestion/anonymous/__init__.py deleted file mode 100644 index fddc715f..00000000 --- a/src/db/models/impl/url/suggestion/anonymous/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from src.db.models.impl.url.suggestion.anonymous.session.sqlalchemy import AnonymousSession \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/name/pydantic.py b/src/db/models/impl/url/suggestion/name/pydantic.py deleted file mode 100644 index 244e02c2..00000000 --- a/src/db/models/impl/url/suggestion/name/pydantic.py +++ /dev/null @@ -1,17 +0,0 @@ -from pydantic import Field - -from src.db.models.impl.url.suggestion.location.auto.subtask.constants import MAX_SUGGESTION_LENGTH -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion -from src.db.templates.markers.bulk.insert import BulkInsertableModel - - -class URLNameSuggestionPydantic(BulkInsertableModel): - - url_id: int - suggestion: str = Field(..., max_length=MAX_SUGGESTION_LENGTH) - source: NameSuggestionSource - - @classmethod - def sa_model(cls) -> type[URLNameSuggestion]: - return URLNameSuggestion \ No newline at end of file diff --git a/src/db/queries/implementations/anonymous_session.py b/src/db/queries/implementations/anonymous_session.py index 0ff00ea3..a2fbf346 100644 --- a/src/db/queries/implementations/anonymous_session.py +++ b/src/db/queries/implementations/anonymous_session.py @@ -2,7 +2,7 @@ from sqlalchemy.ext.asyncio import AsyncSession -from src.db.models.impl.url.suggestion.anonymous import AnonymousSession +from src.db.models.impl.anon_session.sqlalchemy import AnonymousSession from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/db/queries/implementations/core/common/annotation_exists_/constants.py b/src/db/queries/implementations/core/common/annotation_exists_/constants.py index 190291ef..5851b42b 100644 --- a/src/db/queries/implementations/core/common/annotation_exists_/constants.py +++ b/src/db/queries/implementations/core/common/annotation_exists_/constants.py @@ -1,15 +1,15 @@ -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.auto.sqlalchemy import AutoRelevantSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType ALL_ANNOTATION_MODELS = [ - AutoRecordTypeSuggestion, - AutoRelevantSuggestion, - URLAutoAgencyIDSubtask, - UserURLTypeSuggestion, - UserRecordTypeSuggestion, - UserURLAgencySuggestion + AnnotationAutoRecordType, + AnnotationAutoURLType, + AnnotationAgencyAutoSubtask, + AnnotationUserURLType, + AnnotationUserRecordType, + AnnotationAgencyUser ] diff --git a/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py b/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py index d609e2b3..be8a76f9 100644 --- a/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py +++ b/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py @@ -5,10 +5,10 @@ from src.api.endpoints.metrics.dtos.get.urls.aggregated.pending import GetMetricsURLsAggregatedPendingResponseDTO from src.collectors.enums import URLStatus +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType from src.db.models.mixins import URLDependentMixin from src.db.queries.base.builder import QueryBuilderBase from src.db.queries.implementations.core.common.annotation_exists_.core import AnnotationExistsCTEQueryBuilder @@ -17,15 +17,15 @@ class PendingAnnotationExistsCTEQueryBuilder(AnnotationExistsCTEQueryBuilder): @property def has_user_relevant_annotation(self): - return self.get_exists_for_model(UserURLTypeSuggestion) + return self.get_exists_for_model(AnnotationUserURLType) @property def has_user_record_type_annotation(self): - return self.get_exists_for_model(UserRecordTypeSuggestion) + return self.get_exists_for_model(AnnotationUserRecordType) @property def has_user_agency_annotation(self): - return self.get_exists_for_model(UserURLAgencySuggestion) + return self.get_exists_for_model(AnnotationAgencyUser) def get_exists_for_model(self, model: Type[URLDependentMixin]): return self.query.c[ diff --git a/src/db/types.py b/src/db/types.py index c224a36c..0ff28637 100644 --- a/src/db/types.py +++ b/src/db/types.py @@ -1,10 +1,10 @@ from typing import TypeVar -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType from src.db.queries.base.labels import LabelsBase -UserSuggestionType = UserURLAgencySuggestion | UserURLTypeSuggestion | UserRecordTypeSuggestion +UserSuggestionType = AnnotationAgencyUser | AnnotationUserURLType | AnnotationUserRecordType LabelsType = TypeVar("LabelsType", bound=LabelsBase) \ No newline at end of file diff --git a/tests/automated/integration/api/annotate/all/test_happy_path.py b/tests/automated/integration/api/annotate/all/test_happy_path.py index 49d8bd97..faed2220 100644 --- a/tests/automated/integration/api/annotate/all/test_happy_path.py +++ b/tests/automated/integration/api/annotate/all/test_happy_path.py @@ -8,13 +8,13 @@ from src.api.endpoints.annotate.all.post.models.name import AnnotationPostNameInfo from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo from src.core.enums import RecordType +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review @@ -106,19 +106,19 @@ async def test_annotate_all( # Check that all annotations are present in the database # Check URL Type Suggestions - all_relevance_suggestions: list[UserURLTypeSuggestion] = await adb_client.get_all(UserURLTypeSuggestion) + all_relevance_suggestions: list[AnnotationUserURLType] = await adb_client.get_all(AnnotationUserURLType) assert len(all_relevance_suggestions) == 4 suggested_types: set[URLType] = {sugg.type for sugg in all_relevance_suggestions} assert suggested_types == {URLType.DATA_SOURCE, URLType.NOT_RELEVANT} # Should be one agency - all_agency_suggestions = await adb_client.get_all(UserURLAgencySuggestion) + all_agency_suggestions = await adb_client.get_all(AnnotationAgencyUser) assert len(all_agency_suggestions) == 3 suggested_agency_ids: set[int] = {sugg.agency_id for sugg in all_agency_suggestions} assert agency_id in suggested_agency_ids # Should be one record type - all_record_type_suggestions = await adb_client.get_all(UserRecordTypeSuggestion) + all_record_type_suggestions = await adb_client.get_all(AnnotationUserRecordType) assert len(all_record_type_suggestions) == 3 suggested_record_types: set[RecordType] = { sugg.record_type for sugg in all_record_type_suggestions @@ -126,7 +126,7 @@ async def test_annotate_all( assert RecordType.ACCIDENT_REPORTS.value in suggested_record_types # Confirm 3 Location Suggestions, with two belonging to California and one to Pennsylvania - all_location_suggestions = await adb_client.get_all(UserLocationSuggestion) + all_location_suggestions = await adb_client.get_all(AnnotationLocationUser) assert len(all_location_suggestions) == 2 location_ids: list[int] = [location_suggestion.location_id for location_suggestion in all_location_suggestions] assert set(location_ids) == {california.location_id, pennsylvania.location_id} @@ -166,7 +166,7 @@ async def test_annotate_all( assert user_suggestion.user_count == 1 # Confirm 3 name suggestions - name_suggestions: list[URLNameSuggestion] = await adb_client.get_all(URLNameSuggestion) + name_suggestions: list[AnnotationNameSuggestion] = await adb_client.get_all(AnnotationNameSuggestion) assert len(name_suggestions) == 3 suggested_names: set[str] = {name_suggestion.suggestion for name_suggestion in name_suggestions} assert "New Name" in suggested_names diff --git a/tests/automated/integration/api/annotate/anonymous/test_core.py b/tests/automated/integration/api/annotate/anonymous/test_core.py index fa39d6f4..48fb4f4d 100644 --- a/tests/automated/integration/api/annotate/anonymous/test_core.py +++ b/tests/automated/integration/api/annotate/anonymous/test_core.py @@ -10,13 +10,13 @@ from src.api.endpoints.annotate.anonymous.get.response import GetNextURLForAnonymousAnnotationResponse from src.core.enums import RecordType from src.db.dtos.url.mapping_.simple import SimpleURLMapping +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.link.anonymous_sessions__name_suggestion import LinkAnonymousSessionNameSuggestion -from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency -from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation -from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement from src.db.models.mixins import URLDependentMixin from tests.automated.integration.api.annotate.anonymous.helper import get_next_url_for_anonymous_annotation, \ post_and_get_next_url_for_anonymous_annotation @@ -81,10 +81,10 @@ async def test_annotate_anonymous( assert post_response_1.next_annotation.url_info.url_id != get_response_1.next_annotation.url_info.url_id for model in [ - AnonymousAnnotationAgency, - AnonymousAnnotationLocation, - AnonymousAnnotationRecordType, - AnonymousAnnotationURLType + AnnotationAgencyAnon, + AnnotationLocationAnon, + AnnotationAnonRecordType, + AnnotationAnonURLType ]: instances: list[URLDependentMixin] = await ddc.adb_client.get_all(model) assert len(instances) == 1 @@ -92,13 +92,13 @@ async def test_annotate_anonymous( assert instance.url_id == get_response_1.next_annotation.url_info.url_id # Check for existence of name suggestion (2 were added by setup) - name_suggestions: list[URLNameSuggestion] = await ddc.adb_client.get_all(URLNameSuggestion) + name_suggestions: list[AnnotationNameSuggestion] = await ddc.adb_client.get_all(AnnotationNameSuggestion) assert len(name_suggestions) == 3 # Check for existence of link - link_instances: list[LinkAnonymousSessionNameSuggestion] = await ddc.adb_client.get_all(LinkAnonymousSessionNameSuggestion) + link_instances: list[AnnotationNameAnonEndorsement] = await ddc.adb_client.get_all(AnnotationNameAnonEndorsement) assert len(link_instances) == 1 - link_instance: LinkAnonymousSessionNameSuggestion = link_instances[0] + link_instance: AnnotationNameAnonEndorsement = link_instances[0] assert link_instance.session_id == session_id # Run again without giving session ID, confirm original URL returned diff --git a/tests/automated/integration/api/submit/data_source/test_core.py b/tests/automated/integration/api/submit/data_source/test_core.py index 558327c3..5a8fb103 100644 --- a/tests/automated/integration/api/submit/data_source/test_core.py +++ b/tests/automated/integration/api/submit/data_source/test_core.py @@ -7,6 +7,11 @@ from src.collectors.enums import URLStatus from src.core.enums import RecordType, BatchStatus from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL @@ -15,11 +20,6 @@ from src.db.models.impl.url.optional_ds_metadata.enums import AgencyAggregationEnum, UpdateMethodEnum, \ RetentionScheduleEnum, AccessTypeEnum from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata -from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency -from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation -from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion from tests.helpers.api_test_helper import APITestHelper from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo @@ -96,31 +96,31 @@ async def test_submit_data_source( assert batch_url_link.url_id == url.id # Check for anonymous annotations - url_type_suggestion: AnonymousAnnotationURLType = await adb_client.one_or_none_model(AnonymousAnnotationURLType) + url_type_suggestion: AnnotationAnonURLType = await adb_client.one_or_none_model(AnnotationAnonURLType) assert url_type_suggestion is not None assert url_type_suggestion.url_id == url.id assert url_type_suggestion.url_type == URLType.DATA_SOURCE session_id: UUID = url_type_suggestion.session_id # Check for Location Suggestion - location_suggestion: AnonymousAnnotationLocation = await adb_client.one_or_none_model(AnonymousAnnotationLocation) + location_suggestion: AnnotationLocationAnon = await adb_client.one_or_none_model(AnnotationLocationAnon) assert location_suggestion is not None assert location_suggestion.location_id == pittsburgh_locality.location_id assert location_suggestion.session_id == session_id # Check for Agency Suggestion - agency_suggestion: AnonymousAnnotationAgency = await adb_client.one_or_none_model(AnonymousAnnotationAgency) + agency_suggestion: AnnotationAgencyAnon = await adb_client.one_or_none_model(AnnotationAgencyAnon) assert agency_suggestion is not None assert agency_suggestion.agency_id == test_agency_id assert agency_suggestion.session_id == session_id # Check for Name Suggestion - name_suggestion: URLNameSuggestion = await adb_client.one_or_none_model(URLNameSuggestion) + name_suggestion: AnnotationNameSuggestion = await adb_client.one_or_none_model(AnnotationNameSuggestion) assert name_suggestion is not None assert name_suggestion.suggestion == "Example name" # Check for Record Type Suggestion - record_type_suggestion: AnonymousAnnotationRecordType = await adb_client.one_or_none_model(AnonymousAnnotationRecordType) + record_type_suggestion: AnnotationAnonRecordType = await adb_client.one_or_none_model(AnnotationAnonRecordType) assert record_type_suggestion.record_type == RecordType.COMPLAINTS_AND_MISCONDUCT assert record_type_suggestion.session_id == session_id diff --git a/tests/automated/integration/api/submit/test_url_maximal.py b/tests/automated/integration/api/submit/test_url_maximal.py index e57770fb..27fbfe2f 100644 --- a/tests/automated/integration/api/submit/test_url_maximal.py +++ b/tests/automated/integration/api/submit/test_url_maximal.py @@ -5,14 +5,14 @@ from src.api.endpoints.submit.url.models.response import URLSubmissionResponse from src.core.enums import RecordType from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion from src.db.models.impl.link.user_suggestion_not_found.users_submitted_url.sqlalchemy import LinkUserSubmittedURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType from tests.helpers.api_test_helper import APITestHelper from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo @@ -56,21 +56,21 @@ async def test_maximal( link: LinkUserSubmittedURL = links[0] assert link.url_id == url_id - agen_suggs: list[UserURLAgencySuggestion] = await adb_client.get_all(UserURLAgencySuggestion) + agen_suggs: list[AnnotationAgencyUser] = await adb_client.get_all(AnnotationAgencyUser) assert len(agen_suggs) == 1 - agen_sugg: UserURLAgencySuggestion = agen_suggs[0] + agen_sugg: AnnotationAgencyUser = agen_suggs[0] assert agen_sugg.url_id == url_id assert agen_sugg.agency_id == agency_id - loc_suggs: list[UserLocationSuggestion] = await adb_client.get_all(UserLocationSuggestion) + loc_suggs: list[AnnotationLocationUser] = await adb_client.get_all(AnnotationLocationUser) assert len(loc_suggs) == 1 - loc_sugg: UserLocationSuggestion = loc_suggs[0] + loc_sugg: AnnotationLocationUser = loc_suggs[0] assert loc_sugg.url_id == url_id assert loc_sugg.location_id == pittsburgh_locality.location_id - name_sugg: list[URLNameSuggestion] = await adb_client.get_all(URLNameSuggestion) + name_sugg: list[AnnotationNameSuggestion] = await adb_client.get_all(AnnotationNameSuggestion) assert len(name_sugg) == 1 - name_sugg: URLNameSuggestion = name_sugg[0] + name_sugg: AnnotationNameSuggestion = name_sugg[0] assert name_sugg.url_id == url_id assert name_sugg.suggestion == "Example URL" assert name_sugg.source == NameSuggestionSource.USER @@ -80,8 +80,8 @@ async def test_maximal( name_link_sugg: LinkUserNameSuggestion = name_link_suggs[0] assert name_link_sugg.suggestion_id == name_sugg.id - rec_suggs: list[UserRecordTypeSuggestion] = await adb_client.get_all(UserRecordTypeSuggestion) + rec_suggs: list[AnnotationUserRecordType] = await adb_client.get_all(AnnotationUserRecordType) assert len(rec_suggs) == 1 - rec_sugg: UserRecordTypeSuggestion = rec_suggs[0] + rec_sugg: AnnotationUserRecordType = rec_suggs[0] assert rec_sugg.url_id == url_id assert rec_sugg.record_type == RecordType.INCARCERATION_RECORDS.value diff --git a/tests/automated/integration/api/url/by_id/delete/test_any_url.py b/tests/automated/integration/api/url/by_id/delete/test_any_url.py index 50b3ca0c..49f7a407 100644 --- a/tests/automated/integration/api/url/by_id/delete/test_any_url.py +++ b/tests/automated/integration/api/url/by_id/delete/test_any_url.py @@ -7,6 +7,20 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.mapping_.simple import SimpleURLMapping from src.db.enums import ChangeLogOperationType +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType from src.db.models.impl.change_log import ChangeLog from src.db.models.impl.flag.checked_for_ia.sqlalchemy import FlagURLCheckedForInternetArchives from src.db.models.impl.flag.root_url.sqlalchemy import FlagRootURL @@ -15,7 +29,7 @@ from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.link.url_redirect_url.sqlalchemy import LinkURLRedirectURL from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.impl.link.user_suggestion_not_found.users_submitted_url.sqlalchemy import LinkUserSubmittedURL @@ -26,24 +40,10 @@ from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata from src.db.models.impl.url.internet_archives.save.sqlalchemy import URLInternetArchivesSaveMetadata from src.db.models.impl.url.screenshot.sqlalchemy import URLScreenshot -from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode, AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency -from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation -from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion -from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.auto.sqlalchemy import AutoRelevantSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.queries.implementations.anonymous_session import MakeAnonymousSessionQueryBuilder @@ -128,34 +128,34 @@ async def _check_results( # ANNOTATIONS ## AUTO ### Agency - URLAutoAgencyIDSubtask, - AgencyIDSubtaskSuggestion, + AnnotationAgencyAutoSubtask, + AnnotationAgencyAutoSuggestion, ### Record Type - AutoRecordTypeSuggestion, + AnnotationAutoRecordType, ### URL Type - AutoRelevantSuggestion, + AnnotationAutoURLType, ### Location - AutoLocationIDSubtask, - LocationIDSubtaskSuggestion, + AnnotationLocationAutoSubtask, + AnnotationLocationAutoSuggestion, ## USER ### Agency - UserURLAgencySuggestion, + AnnotationAgencyUser, ### Record Type - UserRecordTypeSuggestion, + AnnotationUserRecordType, ### URL Type - UserURLTypeSuggestion, + AnnotationUserURLType, ### Location - UserLocationSuggestion, - URLNameSuggestion, + AnnotationLocationUser, + AnnotationNameSuggestion, ## ANONYMOUS ### Agency - AnonymousAnnotationAgency, + AnnotationAgencyAnon, ### Location - AnonymousAnnotationLocation, + AnnotationLocationAnon, ### Record Type - AnonymousAnnotationRecordType, + AnnotationAnonRecordType, ### URL Type - AnonymousAnnotationURLType, + AnnotationAnonURLType, ] for model in models: assert await dbc.get_all(model) == [] @@ -316,7 +316,7 @@ async def _setup( ### Agency #### Subtask agency_subtask_id: int = await dbc.add( - URLAutoAgencyIDSubtask( + AnnotationAgencyAutoSubtask( url_id=url.url_id, task_id=task_id, agencies_found=True, @@ -327,7 +327,7 @@ async def _setup( ) ### Suggestion await dbc.add( - AgencyIDSubtaskSuggestion( + AnnotationAgencyAutoSuggestion( subtask_id=agency_subtask_id, agency_id=agency_id, confidence=60 @@ -335,14 +335,14 @@ async def _setup( ) ### Record Type await dbc.add( - AutoRecordTypeSuggestion( + AnnotationAutoRecordType( url_id=url.url_id, record_type=RecordType.BOOKING_REPORTS.value ) ) ### Relevant await dbc.add( - AutoRelevantSuggestion( + AnnotationAutoURLType( url_id=url.url_id, relevant=True, confidence=0.5, @@ -352,7 +352,7 @@ async def _setup( ### Location #### Subtask location_subtask_id: int = await dbc.add( - AutoLocationIDSubtask( + AnnotationLocationAutoSubtask( url_id=url.url_id, task_id=task_id, locations_found=True, @@ -362,7 +362,7 @@ async def _setup( ) #### Suggestion await dbc.add( - LocationIDSubtaskSuggestion( + AnnotationLocationAutoSuggestion( subtask_id=location_subtask_id, location_id=pittsburgh_id, confidence=50 @@ -371,7 +371,7 @@ async def _setup( ## USER ### Agency await dbc.add( - UserURLAgencySuggestion( + AnnotationAgencyUser( url_id=url.url_id, user_id=1, agency_id=agency_id, @@ -380,7 +380,7 @@ async def _setup( ) ### Record Type await dbc.add( - UserRecordTypeSuggestion( + AnnotationUserRecordType( url_id=url.url_id, user_id=1, record_type=RecordType.BOOKING_REPORTS.value, @@ -388,7 +388,7 @@ async def _setup( ) ### URL Type await dbc.add( - UserURLTypeSuggestion( + AnnotationUserURLType( url_id=url.url_id, type=URLType.INDIVIDUAL_RECORD, user_id=1 @@ -396,7 +396,7 @@ async def _setup( ) ### Location await dbc.add( - UserLocationSuggestion( + AnnotationLocationUser( url_id=url.url_id, location_id=pittsburgh_id, user_id=1, @@ -404,7 +404,7 @@ async def _setup( ) ### Name name_suggestion_id: int = await dbc.add( - URLNameSuggestion( + AnnotationNameSuggestion( url_id=url.url_id, suggestion="Test Name", source=NameSuggestionSource.USER, @@ -423,25 +423,25 @@ async def _setup( ## ANONYMOUS for model in [ ### Agency - AnonymousAnnotationAgency( + AnnotationAgencyAnon( url_id=url.url_id, agency_id=agency_id, session_id=session_id, ), ### Record Type - AnonymousAnnotationRecordType( + AnnotationAnonRecordType( url_id=url.url_id, record_type=RecordType.BOOKING_REPORTS.value, session_id=session_id, ), ### URL Type - AnonymousAnnotationURLType( + AnnotationAnonURLType( url_id=url.url_id, url_type=URLType.INDIVIDUAL_RECORD, session_id=session_id, ), ### Location - AnonymousAnnotationLocation( + AnnotationLocationAnon( url_id=url.url_id, location_id=pittsburgh_id, session_id=session_id diff --git a/tests/automated/integration/readonly/setup/annotations.py b/tests/automated/integration/readonly/setup/annotations.py index b07bbd9f..ab2ef13e 100644 --- a/tests/automated/integration/readonly/setup/annotations.py +++ b/tests/automated/integration/readonly/setup/annotations.py @@ -1,13 +1,13 @@ from src.core.enums import RecordType from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType async def add_full_data_sources_annotations( @@ -17,7 +17,7 @@ async def add_full_data_sources_annotations( location_id: int, adb_client: AsyncDatabaseClient ) -> None: - name_suggestion = URLNameSuggestion( + name_suggestion = AnnotationNameSuggestion( url_id=url_id, suggestion="Name suggestion", source=NameSuggestionSource.USER @@ -26,12 +26,12 @@ async def add_full_data_sources_annotations( name_suggestion, return_id=True ) - url_type_suggestion = UserURLTypeSuggestion( + url_type_suggestion = AnnotationUserURLType( url_id=url_id, user_id=user_id, type=URLType.DATA_SOURCE ) - record_type_suggestion = UserRecordTypeSuggestion( + record_type_suggestion = AnnotationUserRecordType( user_id=user_id, url_id=url_id, record_type=RecordType.RECORDS_REQUEST_INFO.value @@ -40,12 +40,12 @@ async def add_full_data_sources_annotations( user_id=user_id, suggestion_id=name_suggestion_id, ) - agency_suggestion = UserURLAgencySuggestion( + agency_suggestion = AnnotationAgencyUser( agency_id=agency_id, url_id=url_id, user_id=user_id, ) - location_suggestion = UserLocationSuggestion( + location_suggestion = AnnotationLocationUser( location_id=location_id, url_id=url_id, user_id=user_id, @@ -64,7 +64,7 @@ async def add_minimal_not_relevant_annotation( user_id: int, adb_client: AsyncDatabaseClient ) -> None: - url_type_suggestion = UserURLTypeSuggestion( + url_type_suggestion = AnnotationUserURLType( url_id=url_id, user_id=user_id, type=URLType.NOT_RELEVANT diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/batch_link/test_core.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/batch_link/test_core.py index b39d74ca..e838ee3e 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/batch_link/test_core.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/batch_link/test_core.py @@ -2,10 +2,10 @@ from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from src.db.models.impl.link.agency_batch.sqlalchemy import LinkAgencyBatch -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters from tests.helpers.data_creator.core import DBDataCreator @@ -49,14 +49,14 @@ async def test_batch_link_subtask( assert not await operator.meets_task_prerequisites() assert operator._subtask is None - subtasks: list[URLAutoAgencyIDSubtask] = await adb_client.get_all(URLAutoAgencyIDSubtask) + subtasks: list[AnnotationAgencyAutoSubtask] = await adb_client.get_all(AnnotationAgencyAutoSubtask) assert len(subtasks) == 2 - subtask: URLAutoAgencyIDSubtask = subtasks[0] + subtask: AnnotationAgencyAutoSubtask = subtasks[0] assert subtask.type == AutoAgencyIDSubtaskType.BATCH_LINK assert subtask.agencies_found - suggestions: list[AgencyIDSubtaskSuggestion] = await adb_client.get_all(AgencyIDSubtaskSuggestion) + suggestions: list[AnnotationAgencyAutoSuggestion] = await adb_client.get_all(AnnotationAgencyAutoSuggestion) assert len(suggestions) == 2 assert all(sugg.confidence == 80 for sugg in suggestions) diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/ckan/test_core.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/ckan/test_core.py index 4ec99967..a1ba703f 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/ckan/test_core.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/ckan/test_core.py @@ -4,9 +4,9 @@ from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from tests.helpers.asserts import assert_task_run_success from tests.helpers.data_creator.core import DBDataCreator @@ -57,15 +57,15 @@ async def test_ckan_subtask( assert operator._subtask is None # Verify results - subtasks: list[URLAutoAgencyIDSubtask] = await adb_client.get_all(URLAutoAgencyIDSubtask) + subtasks: list[AnnotationAgencyAutoSubtask] = await adb_client.get_all(AnnotationAgencyAutoSubtask) assert len(subtasks) == 1 - subtask: URLAutoAgencyIDSubtask = subtasks[0] + subtask: AnnotationAgencyAutoSubtask = subtasks[0] assert subtask.type == AutoAgencyIDSubtaskType.CKAN assert subtask.url_id == applicable_url_id subtask_id: int = subtask.id - suggestions: list[AgencyIDSubtaskSuggestion] = await adb_client.get_all( - AgencyIDSubtaskSuggestion + suggestions: list[AnnotationAgencyAutoSuggestion] = await adb_client.get_all( + AnnotationAgencyAutoSuggestion ) assert len(suggestions) == 2 assert {suggestion.agency_id for suggestion in suggestions} == { diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/test_happy_path.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/test_happy_path.py index 7575f37e..7e72b733 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/test_happy_path.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/test_happy_path.py @@ -6,10 +6,10 @@ from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.mapping_.simple import SimpleURLMapping +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.helpers.data_creator.core import DBDataCreator @@ -113,17 +113,17 @@ async def test_homepage_match( adb_client: AsyncDatabaseClient = db_data_creator.adb_client # Confirm presence of subtasks - subtasks: list[URLAutoAgencyIDSubtask] = await adb_client.get_all(URLAutoAgencyIDSubtask) + subtasks: list[AnnotationAgencyAutoSubtask] = await adb_client.get_all(AnnotationAgencyAutoSubtask) assert len(subtasks) == 2 # Confirm both listed as agencies found assert all(subtask.agencies_found for subtask in subtasks) - url_id_to_subtask: dict[int, URLAutoAgencyIDSubtask] = { + url_id_to_subtask: dict[int, AnnotationAgencyAutoSubtask] = { subtask.url_id: subtask for subtask in subtasks } - single_subtask: URLAutoAgencyIDSubtask = url_id_to_subtask[single_url_id] - multi_subtask: URLAutoAgencyIDSubtask = url_id_to_subtask[multi_url_id] + single_subtask: AnnotationAgencyAutoSubtask = url_id_to_subtask[single_url_id] + multi_subtask: AnnotationAgencyAutoSubtask = url_id_to_subtask[multi_url_id] # Check subtasks have expected detail codes assert single_subtask.detail == SubtaskDetailCode.HOMEPAGE_SINGLE_AGENCY @@ -131,16 +131,16 @@ async def test_homepage_match( # Get suggestions - suggestions: list[AgencyIDSubtaskSuggestion] = await adb_client.get_all(AgencyIDSubtaskSuggestion) + suggestions: list[AnnotationAgencyAutoSuggestion] = await adb_client.get_all(AnnotationAgencyAutoSuggestion) assert len(suggestions) == 3 # Confirm each suggestion properly linked to expected subtask - subtask_id_to_suggestions: dict[int, list[AgencyIDSubtaskSuggestion]] = defaultdict(list) + subtask_id_to_suggestions: dict[int, list[AnnotationAgencyAutoSuggestion]] = defaultdict(list) for suggestion in suggestions: subtask_id_to_suggestions[suggestion.subtask_id].append(suggestion) # Check Single Agency Case Suggestion - single_suggestion: AgencyIDSubtaskSuggestion = \ + single_suggestion: AnnotationAgencyAutoSuggestion = \ subtask_id_to_suggestions[single_subtask.id][0] # Check Single Agency Case Suggestion has expected agency assert single_suggestion.agency_id == single_agency_id @@ -148,7 +148,7 @@ async def test_homepage_match( assert single_suggestion.confidence == 95 # Check Multi Agency Case Suggestion - multi_suggestions: list[AgencyIDSubtaskSuggestion] = subtask_id_to_suggestions[multi_subtask.id] + multi_suggestions: list[AnnotationAgencyAutoSuggestion] = subtask_id_to_suggestions[multi_subtask.id] # Check Multi Agency Case Suggestion has expected agencies assert {suggestion.agency_id for suggestion in multi_suggestions} \ == set(multi_agency_ids) diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/muckrock/test_core.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/muckrock/test_core.py index af41354d..aa38b33b 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/muckrock/test_core.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/muckrock/test_core.py @@ -6,9 +6,9 @@ from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from tests.helpers.asserts import assert_task_run_success from tests.helpers.data_creator.core import DBDataCreator @@ -93,15 +93,15 @@ async def test_muckrock_subtask( assert operator._subtask is None # Verify results - subtasks: list[URLAutoAgencyIDSubtask] = await adb_client.get_all(URLAutoAgencyIDSubtask) + subtasks: list[AnnotationAgencyAutoSubtask] = await adb_client.get_all(AnnotationAgencyAutoSubtask) assert len(subtasks) == 1 - subtask: URLAutoAgencyIDSubtask = subtasks[0] + subtask: AnnotationAgencyAutoSubtask = subtasks[0] assert subtask.type == AutoAgencyIDSubtaskType.MUCKROCK assert subtask.url_id == applicable_url_id subtask_id: int = subtask.id - suggestions: list[AgencyIDSubtaskSuggestion] = await adb_client.get_all( - AgencyIDSubtaskSuggestion + suggestions: list[AnnotationAgencyAutoSuggestion] = await adb_client.get_all( + AnnotationAgencyAutoSuggestion ) assert len(suggestions) == 2 assert {suggestion.agency_id for suggestion in suggestions} == { diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_multi_agency_location.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_multi_agency_location.py index 3da841a1..0df07b79 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_multi_agency_location.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_multi_agency_location.py @@ -2,9 +2,9 @@ from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo from tests.helpers.run import run_task_and_confirm_success @@ -53,16 +53,16 @@ async def test_multi_agency_location( assert not await operator.meets_task_prerequisites() # Check for presence of subtask - subtasks: list[URLAutoAgencyIDSubtask] = await adb_client.get_all(URLAutoAgencyIDSubtask) + subtasks: list[AnnotationAgencyAutoSubtask] = await adb_client.get_all(AnnotationAgencyAutoSubtask) assert len(subtasks) == 1 - subtask: URLAutoAgencyIDSubtask = subtasks[0] + subtask: AnnotationAgencyAutoSubtask = subtasks[0] assert subtask.type == AutoAgencyIDSubtaskType.NLP_LOCATION_MATCH # Confirm subtask lists agencies found assert subtask.agencies_found # Confirm multiple agency suggestions in database - suggestions: list[AgencyIDSubtaskSuggestion] = await adb_client.get_all(AgencyIDSubtaskSuggestion) + suggestions: list[AnnotationAgencyAutoSuggestion] = await adb_client.get_all(AnnotationAgencyAutoSuggestion) assert len(suggestions) == 2 # Confirm confidence of location suggestion is distributed evenly among agency suggestions diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_single_agency_location.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_single_agency_location.py index ecec3071..6e1ef42d 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_single_agency_location.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_single_agency_location.py @@ -2,9 +2,9 @@ from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.data_creator.models.creation_info.county import CountyCreationInfo from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo @@ -59,18 +59,18 @@ async def test_single_agency_location( assert not await operator.meets_task_prerequisites() # Check for presence of subtask - subtasks: list[URLAutoAgencyIDSubtask] = await adb_client.get_all(URLAutoAgencyIDSubtask) + subtasks: list[AnnotationAgencyAutoSubtask] = await adb_client.get_all(AnnotationAgencyAutoSubtask) assert len(subtasks) == 1 - subtask: URLAutoAgencyIDSubtask = subtasks[0] + subtask: AnnotationAgencyAutoSubtask = subtasks[0] assert subtask.type == AutoAgencyIDSubtaskType.NLP_LOCATION_MATCH # Confirm subtask lists agencies found assert subtask.agencies_found # Confirm single agency suggestion in database - suggestions: list[AgencyIDSubtaskSuggestion] = await adb_client.get_all(AgencyIDSubtaskSuggestion) + suggestions: list[AnnotationAgencyAutoSuggestion] = await adb_client.get_all(AnnotationAgencyAutoSuggestion) assert len(suggestions) == 1 # Confirm confidence of agency suggestion equal to location suggestion - suggestion: AgencyIDSubtaskSuggestion = suggestions[0] + suggestion: AnnotationAgencyAutoSuggestion = suggestions[0] assert suggestion.confidence == 68 diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/survey/test_survey_flag.py b/tests/automated/integration/tasks/url/impl/agency_identification/survey/test_survey_flag.py index 8ace042e..74e31306 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/survey/test_survey_flag.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/survey/test_survey_flag.py @@ -2,7 +2,7 @@ from src.collectors.enums import CollectorType from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from tests.helpers.data_creator.core import DBDataCreator @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/url/impl/auto_name/test_core.py b/tests/automated/integration/tasks/url/impl/auto_name/test_core.py index c0500d99..66c09017 100644 --- a/tests/automated/integration/tasks/url/impl/auto_name/test_core.py +++ b/tests/automated/integration/tasks/url/impl/auto_name/test_core.py @@ -1,8 +1,8 @@ import pytest from src.core.tasks.url.operators.auto_name.core import AutoNameURLTaskOperator -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.run import run_task_and_confirm_success @@ -31,9 +31,9 @@ async def test_core( assert not await operator.meets_task_prerequisites() # Confirm suggestion was added - suggestions: list[URLNameSuggestion] = await db_data_creator.adb_client.get_all(URLNameSuggestion) + suggestions: list[AnnotationNameSuggestion] = await db_data_creator.adb_client.get_all(AnnotationNameSuggestion) assert len(suggestions) == 1 - suggestion: URLNameSuggestion = suggestions[0] + suggestion: AnnotationNameSuggestion = suggestions[0] assert suggestion.url_id == url_id assert suggestion.suggestion == "test html content" assert suggestion.source == NameSuggestionSource.HTML_METADATA_TITLE \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/auto_relevant/test_task.py b/tests/automated/integration/tasks/url/impl/auto_relevant/test_task.py index 3f4873f4..c9236f6c 100644 --- a/tests/automated/integration/tasks/url/impl/auto_relevant/test_task.py +++ b/tests/automated/integration/tasks/url/impl/auto_relevant/test_task.py @@ -1,11 +1,7 @@ -from collections import Counter - import pytest -from src.collectors.enums import URLStatus from src.core.tasks.url.operators.auto_relevant.core import URLAutoRelevantTaskOperator -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.url_type.auto.sqlalchemy import AutoRelevantSuggestion +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_not_met, assert_prereqs_met from tests.automated.integration.tasks.url.impl.auto_relevant.setup import setup_operator, setup_urls @@ -31,7 +27,7 @@ async def test_url_auto_relevant_task(db_data_creator: DBDataCreator): adb_client = db_data_creator.adb_client # Confirm two annotations were created - suggestions: list[AutoRelevantSuggestion] = await adb_client.get_all(AutoRelevantSuggestion) + suggestions: list[AnnotationAutoURLType] = await adb_client.get_all(AnnotationAutoURLType) assert len(suggestions) == 2 for suggestion in suggestions: assert suggestion.url_id in url_ids diff --git a/tests/automated/integration/tasks/url/impl/location_identification/subtasks/batch_link/test_core.py b/tests/automated/integration/tasks/url/impl/location_identification/subtasks/batch_link/test_core.py index ab505627..0c5238ae 100644 --- a/tests/automated/integration/tasks/url/impl/location_identification/subtasks/batch_link/test_core.py +++ b/tests/automated/integration/tasks/url/impl/location_identification/subtasks/batch_link/test_core.py @@ -2,10 +2,10 @@ from src.core.tasks.url.operators.location_id.core import LocationIdentificationTaskOperator from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion from src.db.models.impl.link.location_batch.sqlalchemy import LinkLocationBatch -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters from tests.helpers.data_creator.core import DBDataCreator @@ -51,13 +51,13 @@ async def test_batch_link_subtask( assert not await operator.meets_task_prerequisites() assert operator._subtask is None - subtasks: list[AutoLocationIDSubtask] = await adb_client.get_all(AutoLocationIDSubtask) + subtasks: list[AnnotationLocationAutoSubtask] = await adb_client.get_all(AnnotationLocationAutoSubtask) assert len(subtasks) == 2 - subtask: AutoLocationIDSubtask = subtasks[0] + subtask: AnnotationLocationAutoSubtask = subtasks[0] assert subtask.type == LocationIDSubtaskType.BATCH_LINK assert subtask.locations_found - suggestions: list[LocationIDSubtaskSuggestion] = await adb_client.get_all(LocationIDSubtaskSuggestion) + suggestions: list[AnnotationLocationAutoSuggestion] = await adb_client.get_all(AnnotationLocationAutoSuggestion) assert len(suggestions) == 2 assert all(sugg.confidence == 80 for sugg in suggestions) diff --git a/tests/automated/integration/tasks/url/impl/location_identification/subtasks/nlp_location_frequency/end_to_end/test_core.py b/tests/automated/integration/tasks/url/impl/location_identification/subtasks/nlp_location_frequency/end_to_end/test_core.py index f8f0c821..8d8bd7c6 100644 --- a/tests/automated/integration/tasks/url/impl/location_identification/subtasks/nlp_location_frequency/end_to_end/test_core.py +++ b/tests/automated/integration/tasks/url/impl/location_identification/subtasks/nlp_location_frequency/end_to_end/test_core.py @@ -9,11 +9,11 @@ from src.core.tasks.url.operators.location_id.subtasks.models.subtask import AutoLocationIDSubtaskData from src.core.tasks.url.operators.location_id.subtasks.models.suggestion import LocationSuggestion from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion from src.db.models.impl.link.task_url import LinkTaskURL -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from tests.helpers.asserts import assert_task_run_success from tests.helpers.data_creator.core import DBDataCreator @@ -90,7 +90,7 @@ async def mock_process_inputs( assert {task_link.task_id for task_link in task_links} == {operator._task_id} # Confirm two subtasks were created - subtasks: list[AutoLocationIDSubtask] = await adb_client.get_all(AutoLocationIDSubtask) + subtasks: list[AnnotationLocationAutoSubtask] = await adb_client.get_all(AnnotationLocationAutoSubtask) assert len(subtasks) == 2 assert {subtask.url_id for subtask in subtasks} == set(url_ids) assert {subtask.task_id for subtask in subtasks} == {operator._task_id} @@ -108,7 +108,7 @@ async def mock_process_inputs( assert error_infos[0].error == "Test error" # Confirm two suggestions for happy path URL id - suggestions: list[LocationIDSubtaskSuggestion] = await adb_client.get_all(LocationIDSubtaskSuggestion) + suggestions: list[AnnotationLocationAutoSuggestion] = await adb_client.get_all(AnnotationLocationAutoSuggestion) assert len(suggestions) == 2 # Confirm expected agency ids assert {suggestion.location_id for suggestion in suggestions} == { diff --git a/tests/automated/integration/tasks/url/impl/location_identification/survey/test_survey_flag.py b/tests/automated/integration/tasks/url/impl/location_identification/survey/test_survey_flag.py index 338c604b..a5dca740 100644 --- a/tests/automated/integration/tasks/url/impl/location_identification/survey/test_survey_flag.py +++ b/tests/automated/integration/tasks/url/impl/location_identification/survey/test_survey_flag.py @@ -1,7 +1,7 @@ import pytest from src.core.tasks.url.operators.location_id.core import LocationIdentificationTaskOperator -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType from tests.helpers.data_creator.core import DBDataCreator diff --git a/tests/automated/integration/tasks/url/impl/test_url_record_type_task.py b/tests/automated/integration/tasks/url/impl/test_url_record_type_task.py index 57f41ded..d9f1de4f 100644 --- a/tests/automated/integration/tasks/url/impl/test_url_record_type_task.py +++ b/tests/automated/integration/tasks/url/impl/test_url_record_type_task.py @@ -3,7 +3,7 @@ import pytest from src.db.enums import TaskType -from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType from src.core.tasks.url.enums import TaskOperatorOutcome from src.core.tasks.url.operators.record_type.core import URLRecordTypeTaskOperator from src.core.enums import RecordType @@ -49,7 +49,7 @@ async def test_url_record_type_task(db_data_creator: DBDataCreator): assert task.url_error_count == 1 # Get metadata - suggestions = await db_data_creator.adb_client.get_all(AutoRecordTypeSuggestion) + suggestions = await db_data_creator.adb_client.get_all(AnnotationAutoRecordType) for suggestion in suggestions: assert suggestion.record_type == RecordType.ACCIDENT_REPORTS.value diff --git a/tests/automated/integration/tasks/url/impl/validate/helper.py b/tests/automated/integration/tasks/url/impl/validate/helper.py index 091fe5fa..ec9901dd 100644 --- a/tests/automated/integration/tasks/url/impl/validate/helper.py +++ b/tests/automated/integration/tasks/url/impl/validate/helper.py @@ -3,13 +3,13 @@ from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo from src.core.enums import RecordType from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.impl.flag.auto_validated.sqlalchemy import FlagURLAutoValidated from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource from src.db.queries.implementations.anonymous_session import MakeAnonymousSessionQueryBuilder from tests.conftest import db_data_creator from tests.helpers.counter import next_int diff --git a/tests/automated/integration/tasks/url/impl/validate/test_data_source.py b/tests/automated/integration/tasks/url/impl/validate/test_data_source.py index 434e8f06..d99e4448 100644 --- a/tests/automated/integration/tasks/url/impl/validate/test_data_source.py +++ b/tests/automated/integration/tasks/url/impl/validate/test_data_source.py @@ -12,12 +12,12 @@ from src.core.enums import RecordType from src.core.tasks.url.operators.validate.core import AutoValidateURLTaskOperator +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.link.anonymous_sessions__name_suggestion import LinkAnonymousSessionNameSuggestion -from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency -from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation -from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement from tests.automated.integration.tasks.url.impl.validate.helper import TestValidateTaskHelper, DEFAULT_RECORD_TYPE from tests.helpers.run import run_task_and_confirm_success @@ -55,27 +55,27 @@ async def test_data_source( session_id_2: UUID = await helper.get_anonymous_session_id() for session_id in [session_id_1, session_id_2]: - anon_url_type = AnonymousAnnotationURLType( + anon_url_type = AnnotationAnonURLType( url_type=URLType.DATA_SOURCE, session_id=session_id, url_id=helper.url_id ) - anon_record_type = AnonymousAnnotationRecordType( + anon_record_type = AnnotationAnonRecordType( record_type=DEFAULT_RECORD_TYPE, session_id=session_id, url_id=helper.url_id ) - anon_location = AnonymousAnnotationLocation( + anon_location = AnnotationLocationAnon( location_id=helper.location_id, session_id=session_id, url_id=helper.url_id ) - anon_agency = AnonymousAnnotationAgency( + anon_agency = AnnotationAgencyAnon( agency_id=helper.agency_id, session_id=session_id, url_id=helper.url_id ) - anon_name_link = LinkAnonymousSessionNameSuggestion( + anon_name_link = AnnotationNameAnonEndorsement( suggestion_id=suggestion_id, session_id=session_id ) @@ -101,7 +101,7 @@ async def test_data_source( # Add tiebreaker -- a single anonymous vote session_id_3: UUID = await helper.get_anonymous_session_id() - anon_record_type = AnonymousAnnotationRecordType( + anon_record_type = AnnotationAnonRecordType( record_type=DEFAULT_RECORD_TYPE, session_id=session_id_3, url_id=helper.url_id diff --git a/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/core.py b/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/core.py index fe54c6f9..ab29a817 100644 --- a/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/core.py +++ b/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/core.py @@ -3,11 +3,10 @@ from typing_extensions import override from src.core.enums import SuggestionType -from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo from src.db.enums import TaskType -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic -from src.db.models.impl.url.suggestion.agency.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase from tests.helpers.data_creator.commands.impl.agency import AgencyCommand diff --git a/tests/helpers/data_creator/commands/impl/suggestion/auto/relevant.py b/tests/helpers/data_creator/commands/impl/suggestion/auto/relevant.py index d85b5a1b..498f736c 100644 --- a/tests/helpers/data_creator/commands/impl/suggestion/auto/relevant.py +++ b/tests/helpers/data_creator/commands/impl/suggestion/auto/relevant.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.url_type.auto.pydantic.input import AutoRelevancyAnnotationInput +from src.db.models.impl.annotation.url_type.auto.pydantic.input import AutoRelevancyAnnotationInput from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase diff --git a/tests/helpers/data_creator/core.py b/tests/helpers/data_creator/core.py index dd08a178..1d71b271 100644 --- a/tests/helpers/data_creator/core.py +++ b/tests/helpers/data_creator/core.py @@ -14,23 +14,23 @@ from src.db.enums import TaskType from src.db.models.impl.agency.enums import AgencyType, JurisdictionType from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.duplicate.pydantic.insert import DuplicateInsertInfo from src.db.models.impl.flag.root_url.sqlalchemy import FlagRootURL from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion from src.db.models.impl.url.task_error.pydantic_.insert import URLTaskErrorPydantic from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters @@ -638,7 +638,7 @@ async def add_user_location_suggestion( user_id: int, location_id: int, ): - suggestion = UserLocationSuggestion( + suggestion = AnnotationLocationUser( url_id=url_id, user_id=user_id, location_id=location_id, @@ -654,7 +654,7 @@ async def add_location_suggestion( ) -> None: locations_found: bool = len(location_ids) > 0 task_id: int = await self.task(url_ids=[url_id]) - subtask = AutoLocationIDSubtask( + subtask = AnnotationLocationAutoSubtask( url_id=url_id, type=type_, task_id=task_id, @@ -663,9 +663,9 @@ async def add_location_suggestion( subtask_id: int = await self.adb_client.add(subtask, return_id=True) if not locations_found: return - suggestions: list[LocationIDSubtaskSuggestion] = [] + suggestions: list[AnnotationLocationAutoSuggestion] = [] for location_id in location_ids: - suggestion = LocationIDSubtaskSuggestion( + suggestion = AnnotationLocationAutoSuggestion( subtask_id=subtask_id, location_id=location_id, confidence=confidence @@ -695,7 +695,7 @@ async def name_suggestion( ) -> int: if name is None: name = f"Test Name {next_int()}" - suggestion = URLNameSuggestion( + suggestion = AnnotationNameSuggestion( url_id=url_id, source=source, suggestion=name, From 01049eb355b39bfabd54e4c457a61e9bea05fe6d Mon Sep 17 00:00:00 2001 From: Max Chis Date: Sat, 20 Dec 2025 09:19:04 -0500 Subject: [PATCH 09/24] Refactor query --- .../annotate/_shared/queries/helper.py | 24 ++++++++++++++----- .../annotate/all/get/queries/core.py | 18 ++------------ .../endpoints/annotate/anonymous/get/query.py | 20 +--------------- 3 files changed, 21 insertions(+), 41 deletions(-) diff --git a/src/api/endpoints/annotate/_shared/queries/helper.py b/src/api/endpoints/annotate/_shared/queries/helper.py index f5bf55eb..3f3745f5 100644 --- a/src/api/endpoints/annotate/_shared/queries/helper.py +++ b/src/api/endpoints/annotate/_shared/queries/helper.py @@ -2,9 +2,12 @@ This module contains helper functions for the annotate GET queries """ -from sqlalchemy import Select, case +from sqlalchemy import Select, case, exists, select from sqlalchemy.orm import joinedload +from src.collectors.enums import URLStatus +from src.db.helpers.query import exists_url, not_exists_url +from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.views.unvalidated_url import UnvalidatedURL @@ -15,11 +18,7 @@ def get_select() -> Select: return ( Select(URL) - # URL Must be unvalidated - .join( - UnvalidatedURL, - UnvalidatedURL.url_id == URL.id - ) + .join( URLAnnotationFlagsView, URLAnnotationFlagsView.url_id == URL.id @@ -31,6 +30,19 @@ def get_select() -> Select: ) def conclude(query: Select) -> Select: + # Add common where conditions + query = query.where( + URL.status == URLStatus.OK.value, + not_exists_url( + FlagURLSuspended + ), + # URL Must be unvalidated + exists_url( + UnvalidatedURL + ) + ) + + query = ( # Add load options query.options( diff --git a/src/api/endpoints/annotate/all/get/queries/core.py b/src/api/endpoints/annotate/all/get/queries/core.py index 8f4fe7a9..efaf0ce8 100644 --- a/src/api/endpoints/annotate/all/get/queries/core.py +++ b/src/api/endpoints/annotate/all/get/queries/core.py @@ -4,17 +4,12 @@ from src.api.endpoints.annotate._shared.extract import extract_and_format_get_annotation_result from src.api.endpoints.annotate._shared.queries import helper from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse -from src.collectors.enums import URLStatus from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser -from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType -from src.db.models.views.unvalidated_url import UnvalidatedURL -from src.db.models.views.url_anno_count import URLAnnotationCount -from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView +from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL +from src.db.models.impl.url.core.sqlalchemy import URL from src.db.queries.base.builder import QueryBuilderBase @@ -45,7 +40,6 @@ async def run( query = ( query .where( - URL.status == URLStatus.OK.value, # Must not have been previously annotated by user ~exists( select(AnnotationUserURLType.url_id) @@ -78,14 +72,6 @@ async def run( AnnotationUserRecordType.url_id == URL.id, AnnotationUserRecordType.user_id == self.user_id, ) - ), - ~exists( - select( - FlagURLSuspended.url_id - ) - .where( - FlagURLSuspended.url_id == URL.id, - ) ) ) ) diff --git a/src/api/endpoints/annotate/anonymous/get/query.py b/src/api/endpoints/annotate/anonymous/get/query.py index a7b96c1e..b9097ac3 100644 --- a/src/api/endpoints/annotate/anonymous/get/query.py +++ b/src/api/endpoints/annotate/anonymous/get/query.py @@ -1,27 +1,18 @@ -from typing import Any from uuid import UUID -from sqlalchemy import Select, func, exists, select from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import joinedload from src.api.endpoints.annotate._shared.extract import extract_and_format_get_annotation_result +from src.api.endpoints.annotate._shared.queries import helper from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.api.endpoints.annotate.anonymous.get.helpers import not_exists_anon_annotation from src.api.endpoints.annotate.anonymous.get.response import GetNextURLForAnonymousAnnotationResponse -from src.collectors.enums import URLStatus -from src.db.helpers.query import not_exists_url from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType -from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.views.unvalidated_url import UnvalidatedURL -from src.db.models.views.url_anno_count import URLAnnotationCount -from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView from src.db.queries.base.builder import QueryBuilderBase -from src.api.endpoints.annotate._shared.queries import helper class GetNextURLForAnonymousAnnotationQueryBuilder(QueryBuilderBase): @@ -40,7 +31,6 @@ async def run(self, session: AsyncSession) -> GetNextURLForAnonymousAnnotationRe query = ( query .where( - URL.status == URLStatus.OK.value, # Must not have been previously annotated by user not_exists_anon_annotation( session_id=self.session_id, @@ -57,14 +47,6 @@ async def run(self, session: AsyncSession) -> GetNextURLForAnonymousAnnotationRe not_exists_anon_annotation( session_id=self.session_id, anon_model=AnnotationAgencyAnon - ), - ~exists( - select( - FlagURLSuspended.url_id - ) - .where( - FlagURLSuspended.url_id == URL.id, - ) ) ) ) From 74a07a50e69784681ecf8eb7d1f52747ebd98873 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Sat, 20 Dec 2025 17:04:48 -0500 Subject: [PATCH 10/24] Add annotation migrate endpoint --- .../annotate/all/get/queries/convert.py | 8 +- .../annotate/all/get/queries/core.py | 16 +-- .../annotate/all/get/queries/name/core.py | 10 +- .../endpoints/annotate/all/post/requester.py | 14 +- .../endpoints/annotate/anonymous/get/query.py | 8 +- .../annotate/anonymous/post/query.py | 8 +- .../endpoints/annotate/migrate/__init__.py | 0 src/api/endpoints/annotate/migrate/query.py | 131 ++++++++++++++++++ src/api/endpoints/annotate/routes.py | 22 ++- .../contributions/shared/contributions.py | 6 +- .../user/queries/agreement/record_type.py | 12 +- .../user/queries/agreement/url_type.py | 12 +- .../user/queries/annotated_and_validated.py | 10 +- .../metrics/urls/breakdown/query/core.py | 12 +- .../submit/data_source/queries/core.py | 8 +- src/api/endpoints/submit/url/queries/core.py | 8 +- .../validate/queries/ctes/counts/impl/name.py | 6 +- .../queries/ctes/counts/impl/record_type.py | 20 +-- .../queries/ctes/counts/impl/url_type.py | 20 +-- src/db/client/async_.py | 12 +- src/db/client/types.py | 6 +- src/db/constants.py | 8 +- src/db/dto_converter.py | 8 +- .../impl/annotation/name/user/pydantic.py | 6 +- .../impl/annotation/name/user/sqlalchemy.py | 2 +- .../annotation/record_type/anon/sqlalchemy.py | 2 +- .../impl/annotation/record_type/user/user.py | 2 +- .../annotation/url_type/anon/sqlalchemy.py | 2 +- .../annotation/url_type/user/sqlalchemy.py | 2 +- src/db/models/impl/url/core/sqlalchemy.py | 8 +- .../common/annotation_exists_/constants.py | 8 +- .../core/metrics/urls/aggregated/pending.py | 8 +- src/db/types.py | 6 +- .../api/annotate/all/test_happy_path.py | 12 +- .../api/annotate/anonymous/test_core.py | 64 ++++++++- .../api/submit/data_source/test_core.py | 8 +- .../api/submit/test_url_maximal.py | 12 +- .../api/url/by_id/delete/test_any_url.py | 28 ++-- .../integration/readonly/setup/annotations.py | 14 +- .../url/impl/validate/test_data_source.py | 10 +- tests/helpers/data_creator/core.py | 4 +- 41 files changed, 385 insertions(+), 178 deletions(-) create mode 100644 src/api/endpoints/annotate/migrate/__init__.py create mode 100644 src/api/endpoints/annotate/migrate/query.py diff --git a/src/api/endpoints/annotate/all/get/queries/convert.py b/src/api/endpoints/annotate/all/get/queries/convert.py index 0b0f0791..80625d3c 100644 --- a/src/api/endpoints/annotate/all/get/queries/convert.py +++ b/src/api/endpoints/annotate/all/get/queries/convert.py @@ -5,12 +5,12 @@ from src.api.endpoints.annotate.all.get.models.url_type import URLTypeAnnotationSuggestion from src.core.enums import RecordType from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser def convert_user_url_type_suggestion_to_url_type_annotation_suggestion( - db_suggestions: list[AnnotationUserURLType] + db_suggestions: list[AnnotationURLTypeUser] ) -> list[URLTypeAnnotationSuggestion]: counter: Counter[URLType] = Counter() for suggestion in db_suggestions: @@ -26,7 +26,7 @@ def convert_user_url_type_suggestion_to_url_type_annotation_suggestion( return anno_suggestions def convert_user_record_type_suggestion_to_record_type_annotation_suggestion( - db_suggestions: list[AnnotationUserRecordType] + db_suggestions: list[AnnotationRecordTypeUser] ) -> RecordTypeAnnotationResponseOuterInfo: counter: Counter[RecordType] = Counter() for suggestion in db_suggestions: diff --git a/src/api/endpoints/annotate/all/get/queries/core.py b/src/api/endpoints/annotate/all/get/queries/core.py index efaf0ce8..852886c6 100644 --- a/src/api/endpoints/annotate/all/get/queries/core.py +++ b/src/api/endpoints/annotate/all/get/queries/core.py @@ -6,8 +6,8 @@ from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL from src.db.queries.base.builder import QueryBuilderBase @@ -42,10 +42,10 @@ async def run( .where( # Must not have been previously annotated by user ~exists( - select(AnnotationUserURLType.url_id) + select(AnnotationURLTypeUser.url_id) .where( - AnnotationUserURLType.url_id == URL.id, - AnnotationUserURLType.user_id == self.user_id, + AnnotationURLTypeUser.url_id == URL.id, + AnnotationURLTypeUser.user_id == self.user_id, ) ), ~exists( @@ -66,11 +66,11 @@ async def run( ), ~exists( select( - AnnotationUserRecordType.url_id + AnnotationRecordTypeUser.url_id ) .where( - AnnotationUserRecordType.url_id == URL.id, - AnnotationUserRecordType.user_id == self.user_id, + AnnotationRecordTypeUser.url_id == URL.id, + AnnotationRecordTypeUser.user_id == self.user_id, ) ) ) diff --git a/src/api/endpoints/annotate/all/get/queries/name/core.py b/src/api/endpoints/annotate/all/get/queries/name/core.py index 9eba70ee..3cc1324d 100644 --- a/src/api/endpoints/annotate/all/get/queries/name/core.py +++ b/src/api/endpoints/annotate/all/get/queries/name/core.py @@ -7,7 +7,7 @@ from src.db.helpers.session import session_helper as sh from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion -from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.queries.base.builder import QueryBuilderBase @@ -26,7 +26,7 @@ async def run(self, session: AsyncSession) -> NameAnnotationResponseOuterInfo: AnnotationNameSuggestion.id.label('id'), AnnotationNameSuggestion.suggestion.label('display_name'), func.count( - LinkUserNameSuggestion.user_id + AnnotationNameUserEndorsement.user_id ).label('user_count'), case( (AnnotationNameSuggestion.source == NameSuggestionSource.HTML_METADATA_TITLE, 1), @@ -34,8 +34,8 @@ async def run(self, session: AsyncSession) -> NameAnnotationResponseOuterInfo: ).label("robo_count") ) .outerjoin( - LinkUserNameSuggestion, - LinkUserNameSuggestion.suggestion_id == AnnotationNameSuggestion.id, + AnnotationNameUserEndorsement, + AnnotationNameUserEndorsement.suggestion_id == AnnotationNameSuggestion.id, ) .where( AnnotationNameSuggestion.url_id == self.url_id, @@ -45,7 +45,7 @@ async def run(self, session: AsyncSession) -> NameAnnotationResponseOuterInfo: AnnotationNameSuggestion.suggestion, ) .order_by( - func.count(LinkUserNameSuggestion.user_id).desc(), + func.count(AnnotationNameUserEndorsement.user_id).desc(), AnnotationNameSuggestion.id.asc(), ) .limit(3) diff --git a/src/api/endpoints/annotate/all/post/requester.py b/src/api/endpoints/annotate/all/post/requester.py index 2034ecc1..e0119235 100644 --- a/src/api/endpoints/annotate/all/post/requester.py +++ b/src/api/endpoints/annotate/all/post/requester.py @@ -7,11 +7,11 @@ from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.templates.requester import RequesterBase @@ -33,7 +33,7 @@ def optionally_add_record_type( ) -> None: if rt is None: return - record_type_suggestion = AnnotationUserRecordType( + record_type_suggestion = AnnotationRecordTypeUser( url_id=self.url_id, user_id=self.user_id, record_type=rt.value @@ -44,7 +44,7 @@ def add_relevant_annotation( self, url_type: URLType, ) -> None: - relevant_suggestion = AnnotationUserURLType( + relevant_suggestion = AnnotationURLTypeUser( url_id=self.url_id, user_id=self.user_id, type=url_type @@ -77,7 +77,7 @@ async def optionally_add_name_suggestion( if name_info.empty: return if name_info.existing_name_id is not None: - link = LinkUserNameSuggestion( + link = AnnotationNameUserEndorsement( user_id=self.user_id, suggestion_id=name_info.existing_name_id, ) @@ -90,7 +90,7 @@ async def optionally_add_name_suggestion( ) self.session.add(name_suggestion) await self.session.flush() - link = LinkUserNameSuggestion( + link = AnnotationNameUserEndorsement( user_id=self.user_id, suggestion_id=name_suggestion.id, ) diff --git a/src/api/endpoints/annotate/anonymous/get/query.py b/src/api/endpoints/annotate/anonymous/get/query.py index b9097ac3..684df2f5 100644 --- a/src/api/endpoints/annotate/anonymous/get/query.py +++ b/src/api/endpoints/annotate/anonymous/get/query.py @@ -9,8 +9,8 @@ from src.api.endpoints.annotate.anonymous.get.response import GetNextURLForAnonymousAnnotationResponse from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon -from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType -from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon from src.db.models.impl.url.core.sqlalchemy import URL from src.db.queries.base.builder import QueryBuilderBase @@ -34,11 +34,11 @@ async def run(self, session: AsyncSession) -> GetNextURLForAnonymousAnnotationRe # Must not have been previously annotated by user not_exists_anon_annotation( session_id=self.session_id, - anon_model=AnnotationAnonURLType + anon_model=AnnotationURLTypeAnon ), not_exists_anon_annotation( session_id=self.session_id, - anon_model=AnnotationAnonRecordType + anon_model=AnnotationRecordTypeAnon ), not_exists_anon_annotation( session_id=self.session_id, diff --git a/src/api/endpoints/annotate/anonymous/post/query.py b/src/api/endpoints/annotate/anonymous/post/query.py index 50ebad7c..a4f0cebf 100644 --- a/src/api/endpoints/annotate/anonymous/post/query.py +++ b/src/api/endpoints/annotate/anonymous/post/query.py @@ -7,8 +7,8 @@ from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion -from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType -from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement from src.db.queries.base.builder import QueryBuilderBase @@ -27,7 +27,7 @@ def __init__( async def run(self, session: AsyncSession) -> None: - url_type_suggestion = AnnotationAnonURLType( + url_type_suggestion = AnnotationURLTypeAnon( url_id=self.url_id, url_type=self.post_info.suggested_status, session_id=self.session_id @@ -57,7 +57,7 @@ async def run(self, session: AsyncSession) -> None: session.add(name_suggestion) if self.post_info.record_type is not None: - record_type_suggestion = AnnotationAnonRecordType( + record_type_suggestion = AnnotationRecordTypeAnon( url_id=self.url_id, record_type=self.post_info.record_type, session_id=self.session_id diff --git a/src/api/endpoints/annotate/migrate/__init__.py b/src/api/endpoints/annotate/migrate/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/annotate/migrate/query.py b/src/api/endpoints/annotate/migrate/query.py new file mode 100644 index 00000000..327f43ce --- /dev/null +++ b/src/api/endpoints/annotate/migrate/query.py @@ -0,0 +1,131 @@ +from typing import Any +from uuid import UUID + +from sqlalchemy import insert, select, delete +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser +from src.db.queries.base.builder import QueryBuilderBase + + +class MigrateAnonymousAnnotationsQueryBuilder(QueryBuilderBase): + + def __init__( + self, + session_id: UUID, + user_id: int + ): + super().__init__() + self.session_id = session_id + self.user_id = user_id + + async def run(self, session: AsyncSession) -> Any: + await self.migrate_agency_annotations(session) + await self.migrate_location_annotations(session) + await self.migrate_record_type_annotations(session) + await self.migrate_url_type_annotations(session) + await self.migrate_name_annotations(session) + + async def migrate_agency_annotations(self, session: AsyncSession) -> None: + # Copy all agency annotations from anonymous to user. + statement = insert(AnnotationAgencyUser).from_select( + ["agency_id", "url_id", "user_id"], + select( + AnnotationAgencyAnon.agency_id, + AnnotationAgencyAnon.url_id, + self.user_id + ).where( + AnnotationAgencyAnon.session_id == self.session_id + ) + ) + await session.execute(statement) + # Delete all anonymous agency annotations. + statement = delete(AnnotationAgencyAnon).where( + AnnotationAgencyAnon.session_id == self.session_id + ) + await session.execute(statement) + + + async def migrate_location_annotations(self, session: AsyncSession) -> None: + # Copy all location annotations from anonymous to user. + statement = insert(AnnotationLocationUser).from_select( + ['location_id', 'url_id', 'user_id'], + select( + AnnotationLocationAnon.location_id, + AnnotationLocationAnon.url_id, + self.user_id + ).where( + AnnotationLocationAnon.session_id == self.session_id + ) + ) + await session.execute(statement) + # Delete all anonymous location annotations. + statement = delete(AnnotationLocationAnon).where( + AnnotationLocationAnon.session_id == self.session_id + ) + await session.execute(statement) + + async def migrate_record_type_annotations(self, session: AsyncSession) -> None: + # Copy all record type annotations from anonymous to user. + statement = insert(AnnotationRecordTypeUser).from_select( + ['record_type', 'url_id', 'user_id'], + select( + AnnotationRecordTypeAnon.record_type, + AnnotationRecordTypeAnon.url_id, + self.user_id + ).where( + AnnotationRecordTypeAnon.session_id == self.session_id + ) + ) + await session.execute(statement) + # Delete all anonymous record type annotations. + statement = delete(AnnotationRecordTypeAnon).where( + AnnotationRecordTypeAnon.session_id == self.session_id + ) + await session.execute(statement) + + async def migrate_url_type_annotations(self, session: AsyncSession) -> None: + # Copy all url type annotations from anonymous to user. + statement = insert(AnnotationURLTypeUser).from_select( + ['type', 'url_id', 'user_id'], + select( + AnnotationURLTypeAnon.url_type, + AnnotationURLTypeAnon.url_id, + self.user_id + ).where( + AnnotationURLTypeAnon.session_id == self.session_id + ) + ) + await session.execute(statement) + # Delete all anonymous url type annotations. + statement = delete(AnnotationURLTypeAnon).where( + AnnotationURLTypeAnon.session_id == self.session_id + ) + await session.execute(statement) + + async def migrate_name_annotations(self, session: AsyncSession) -> None: + # Copy all name annotations from anonymous to user. + statement = insert(AnnotationNameUserEndorsement).from_select( + ['suggestion_id', 'user_id'], + select( + AnnotationNameAnonEndorsement.suggestion_id, + self.user_id + ).where( + AnnotationNameAnonEndorsement.session_id == self.session_id + ) + ) + await session.execute(statement) + # Delete all anonymous name annotations. + statement = delete(AnnotationNameAnonEndorsement).where( + AnnotationNameAnonEndorsement.session_id == self.session_id + ) + await session.execute(statement) diff --git a/src/api/endpoints/annotate/routes.py b/src/api/endpoints/annotate/routes.py index ee3cc3c7..945de945 100644 --- a/src/api/endpoints/annotate/routes.py +++ b/src/api/endpoints/annotate/routes.py @@ -12,6 +12,8 @@ from src.api.endpoints.annotate.anonymous.get.query import GetNextURLForAnonymousAnnotationQueryBuilder from src.api.endpoints.annotate.anonymous.get.response import GetNextURLForAnonymousAnnotationResponse from src.api.endpoints.annotate.anonymous.post.query import AddAnonymousAnnotationsToURLQueryBuilder +from src.api.endpoints.annotate.migrate.query import MigrateAnonymousAnnotationsQueryBuilder +from src.api.shared.models.message_response import MessageResponse from src.core.core import AsyncCore from src.db.queries.implementations.anonymous_session import MakeAnonymousSessionQueryBuilder from src.security.dtos.access_info import AccessInfo @@ -113,6 +115,23 @@ async def annotate_url_for_all_annotations_and_get_next_url( url_id=anno_url_id ) +@annotate_router.post('/migrate') +async def migrate_annotations_to_user( + async_core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_standard_user_access_info), + session_id: UUID = Query(description="The session id of the anonymous user") +) -> MessageResponse: + """Migrate annotations from an anonymous session to a user's account.""" + await async_core.adb_client.run_query_builder( + MigrateAnonymousAnnotationsQueryBuilder( + session_id=session_id, + user_id=access_info.user_id + ) + ) + return MessageResponse( + message="Annotations migrated successfully." + ) + @annotate_router.get("/suggestions/agencies/{url_id}") async def get_agency_suggestions( url_id: int, @@ -125,4 +144,5 @@ async def get_agency_suggestions( url_id=url_id, location_id=location_id ) - ) \ No newline at end of file + ) + diff --git a/src/api/endpoints/contributions/shared/contributions.py b/src/api/endpoints/contributions/shared/contributions.py index e62c0e7f..66d7b0be 100644 --- a/src/api/endpoints/contributions/shared/contributions.py +++ b/src/api/endpoints/contributions/shared/contributions.py @@ -1,6 +1,6 @@ from sqlalchemy import select, func, CTE, Column -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser class ContributionsCTEContainer: @@ -8,11 +8,11 @@ class ContributionsCTEContainer: def __init__(self): self._cte = ( select( - AnnotationUserURLType.user_id, + AnnotationURLTypeUser.user_id, func.count().label("count") ) .group_by( - AnnotationUserURLType.user_id + AnnotationURLTypeUser.user_id ) .cte("contributions") ) diff --git a/src/api/endpoints/contributions/user/queries/agreement/record_type.py b/src/api/endpoints/contributions/user/queries/agreement/record_type.py index 278c4c60..b865cb52 100644 --- a/src/api/endpoints/contributions/user/queries/agreement/record_type.py +++ b/src/api/endpoints/contributions/user/queries/agreement/record_type.py @@ -3,7 +3,7 @@ from src.api.endpoints.contributions.user.queries.annotated_and_validated import AnnotatedAndValidatedCTEContainer from src.api.endpoints.contributions.user.queries.templates.agreement import AgreementCTEContainer from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser def get_record_type_agreement_cte_container( @@ -16,8 +16,8 @@ def get_record_type_agreement_cte_container( func.count() ) .join( - AnnotationUserRecordType, - AnnotationUserRecordType.url_id == inner_cte.url_id + AnnotationRecordTypeUser, + AnnotationRecordTypeUser.url_id == inner_cte.url_id ) .group_by( inner_cte.user_id @@ -31,14 +31,14 @@ def get_record_type_agreement_cte_container( func.count() ) .join( - AnnotationUserRecordType, - AnnotationUserRecordType.url_id == inner_cte.url_id + AnnotationRecordTypeUser, + AnnotationRecordTypeUser.url_id == inner_cte.url_id ) .join( URLRecordType, and_( URLRecordType.url_id == inner_cte.url_id, - URLRecordType.record_type == AnnotationUserRecordType.record_type + URLRecordType.record_type == AnnotationRecordTypeUser.record_type ) ) .group_by( diff --git a/src/api/endpoints/contributions/user/queries/agreement/url_type.py b/src/api/endpoints/contributions/user/queries/agreement/url_type.py index 57a2a5a1..a0ffc2e0 100644 --- a/src/api/endpoints/contributions/user/queries/agreement/url_type.py +++ b/src/api/endpoints/contributions/user/queries/agreement/url_type.py @@ -3,7 +3,7 @@ from src.api.endpoints.contributions.user.queries.annotated_and_validated import AnnotatedAndValidatedCTEContainer from src.api.endpoints.contributions.user.queries.templates.agreement import AgreementCTEContainer from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser def get_url_type_agreement_cte_container( @@ -17,8 +17,8 @@ def get_url_type_agreement_cte_container( func.count() ) .join( - AnnotationUserURLType, - AnnotationUserURLType.url_id == inner_cte.url_id + AnnotationURLTypeUser, + AnnotationURLTypeUser.url_id == inner_cte.url_id ) .join( FlagURLValidated, @@ -36,14 +36,14 @@ def get_url_type_agreement_cte_container( func.count() ) .join( - AnnotationUserURLType, - AnnotationUserURLType.url_id == inner_cte.url_id + AnnotationURLTypeUser, + AnnotationURLTypeUser.url_id == inner_cte.url_id ) .join( FlagURLValidated, and_( FlagURLValidated.url_id == inner_cte.url_id, - AnnotationUserURLType.type == FlagURLValidated.type + AnnotationURLTypeUser.type == FlagURLValidated.type ) ) diff --git a/src/api/endpoints/contributions/user/queries/annotated_and_validated.py b/src/api/endpoints/contributions/user/queries/annotated_and_validated.py index 1be14e28..b617449e 100644 --- a/src/api/endpoints/contributions/user/queries/annotated_and_validated.py +++ b/src/api/endpoints/contributions/user/queries/annotated_and_validated.py @@ -1,7 +1,7 @@ from sqlalchemy import select, Column, CTE from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser class AnnotatedAndValidatedCTEContainer: @@ -9,16 +9,16 @@ class AnnotatedAndValidatedCTEContainer: def __init__(self, user_id: int | None): self._cte = ( select( - AnnotationUserURLType.user_id, - AnnotationUserURLType.url_id + AnnotationURLTypeUser.user_id, + AnnotationURLTypeUser.url_id ) .join( FlagURLValidated, - FlagURLValidated.url_id == AnnotationUserURLType.url_id + FlagURLValidated.url_id == AnnotationURLTypeUser.url_id ) ) if user_id is not None: - self._cte = self._cte.where(AnnotationUserURLType.user_id == user_id) + self._cte = self._cte.where(AnnotationURLTypeUser.user_id == user_id) self._cte = self._cte.cte("annotated_and_validated") @property diff --git a/src/api/endpoints/metrics/urls/breakdown/query/core.py b/src/api/endpoints/metrics/urls/breakdown/query/core.py index 949c8abd..c214b169 100644 --- a/src/api/endpoints/metrics/urls/breakdown/query/core.py +++ b/src/api/endpoints/metrics/urls/breakdown/query/core.py @@ -7,8 +7,8 @@ from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.queries.base.builder import QueryBuilderBase @@ -19,18 +19,18 @@ async def run(self, session: AsyncSession) -> GetMetricsURLsBreakdownPendingResp flags = ( select( URL.id.label("url_id"), - case((AnnotationUserRecordType.url_id != None, literal(True)), else_=literal(False)).label( + case((AnnotationRecordTypeUser.url_id != None, literal(True)), else_=literal(False)).label( "has_user_record_type_annotation" ), - case((AnnotationUserURLType.url_id != None, literal(True)), else_=literal(False)).label( + case((AnnotationURLTypeUser.url_id != None, literal(True)), else_=literal(False)).label( "has_user_relevant_annotation" ), case((AnnotationAgencyUser.url_id != None, literal(True)), else_=literal(False)).label( "has_user_agency_annotation" ), ) - .outerjoin(AnnotationUserRecordType, URL.id == AnnotationUserRecordType.url_id) - .outerjoin(AnnotationUserURLType, URL.id == AnnotationUserURLType.url_id) + .outerjoin(AnnotationRecordTypeUser, URL.id == AnnotationRecordTypeUser.url_id) + .outerjoin(AnnotationURLTypeUser, URL.id == AnnotationURLTypeUser.url_id) .outerjoin(AnnotationAgencyUser, URL.id == AnnotationAgencyUser.url_id) ).cte("flags") diff --git a/src/api/endpoints/submit/data_source/queries/core.py b/src/api/endpoints/submit/data_source/queries/core.py index 17233386..77c33dca 100644 --- a/src/api/endpoints/submit/data_source/queries/core.py +++ b/src/api/endpoints/submit/data_source/queries/core.py @@ -12,8 +12,8 @@ from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion -from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType -from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL @@ -75,7 +75,7 @@ async def run( session_id: uuid.UUID = await MakeAnonymousSessionQueryBuilder().run(session=session) # Add URL Type Suggestion - url_type_suggestion = AnnotationAnonURLType( + url_type_suggestion = AnnotationURLTypeAnon( url_id=url_id, url_type=URLType.DATA_SOURCE, session_id=session_id @@ -84,7 +84,7 @@ async def run( # Optionally add Record Type as suggestion if self.request.record_type is not None: - record_type_suggestion = AnnotationAnonRecordType( + record_type_suggestion = AnnotationRecordTypeAnon( url_id=url_id, record_type=self.request.record_type.value, session_id=session_id diff --git a/src/api/endpoints/submit/url/queries/core.py b/src/api/endpoints/submit/url/queries/core.py index ccbbc2c4..54ab5439 100644 --- a/src/api/endpoints/submit/url/queries/core.py +++ b/src/api/endpoints/submit/url/queries/core.py @@ -12,11 +12,11 @@ from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion -from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.models.impl.link.user_suggestion_not_found.users_submitted_url.sqlalchemy import LinkUserSubmittedURL from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.queries.base.builder import QueryBuilderBase from src.util.models.url_and_scheme import URLAndScheme from src.util.url import clean_url, get_url_and_scheme, is_valid_url @@ -77,7 +77,7 @@ async def run(self, session: AsyncSession) -> URLSubmissionResponse: # Add record type as suggestion if exists if self.request.record_type is not None: - rec_sugg = AnnotationUserRecordType( + rec_sugg = AnnotationRecordTypeUser( user_id=self.user_id, url_id=url_insert.id, record_type=self.request.record_type.value @@ -94,7 +94,7 @@ async def run(self, session: AsyncSession) -> URLSubmissionResponse: session.add(name_sugg) await session.flush() - link_name_sugg = LinkUserNameSuggestion( + link_name_sugg = AnnotationNameUserEndorsement( suggestion_id=name_sugg.id, user_id=self.user_id ) diff --git a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py index cec89ef2..606105d0 100644 --- a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py +++ b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py @@ -3,7 +3,7 @@ from src.core.tasks.url.operators.validate.queries.ctes.counts.core import ValidatedCountsCTEContainer from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement -from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.models.views.unvalidated_url import UnvalidatedURL _user_counts = ( @@ -13,8 +13,8 @@ func.count().label("votes") ) .join( - LinkUserNameSuggestion, - LinkUserNameSuggestion.suggestion_id == AnnotationNameSuggestion.id + AnnotationNameUserEndorsement, + AnnotationNameUserEndorsement.suggestion_id == AnnotationNameSuggestion.id ) .group_by( AnnotationNameSuggestion.url_id, diff --git a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/record_type.py b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/record_type.py index efc92455..19455587 100644 --- a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/record_type.py +++ b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/record_type.py @@ -2,31 +2,31 @@ from src.core.tasks.url.operators.validate.queries.ctes.counts.constants import ANONYMOUS_VOTE_RATIO from src.core.tasks.url.operators.validate.queries.ctes.counts.core import ValidatedCountsCTEContainer -from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.views.unvalidated_url import UnvalidatedURL _user_counts = ( select( - AnnotationUserRecordType.url_id, - AnnotationUserRecordType.record_type.label("entity"), + AnnotationRecordTypeUser.url_id, + AnnotationRecordTypeUser.record_type.label("entity"), func.count().label("votes") ) .group_by( - AnnotationUserRecordType.url_id, - AnnotationUserRecordType.record_type + AnnotationRecordTypeUser.url_id, + AnnotationRecordTypeUser.record_type ) ) _anon_counts = ( select( - AnnotationAnonRecordType.url_id, - AnnotationAnonRecordType.record_type.label("entity"), + AnnotationRecordTypeAnon.url_id, + AnnotationRecordTypeAnon.record_type.label("entity"), (func.count() * ANONYMOUS_VOTE_RATIO).label("votes") ) .group_by( - AnnotationAnonRecordType.url_id, - AnnotationAnonRecordType.record_type + AnnotationRecordTypeAnon.url_id, + AnnotationRecordTypeAnon.record_type ) ) diff --git a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/url_type.py b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/url_type.py index 6c87e69b..bd92fdb8 100644 --- a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/url_type.py +++ b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/url_type.py @@ -2,31 +2,31 @@ from src.core.tasks.url.operators.validate.queries.ctes.counts.constants import ANONYMOUS_VOTE_RATIO from src.core.tasks.url.operators.validate.queries.ctes.counts.core import ValidatedCountsCTEContainer -from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.views.unvalidated_url import UnvalidatedURL _user_counts = ( select( - AnnotationUserURLType.url_id, - AnnotationUserURLType.type.label("entity"), + AnnotationURLTypeUser.url_id, + AnnotationURLTypeUser.type.label("entity"), func.count().label("votes") ) .group_by( - AnnotationUserURLType.url_id, - AnnotationUserURLType.type + AnnotationURLTypeUser.url_id, + AnnotationURLTypeUser.type ) ) _anon_counts = ( select( - AnnotationAnonURLType.url_id, - AnnotationAnonURLType.url_type.label("entity"), + AnnotationURLTypeAnon.url_id, + AnnotationURLTypeAnon.url_type.label("entity"), (func.count() / ANONYMOUS_VOTE_RATIO).label("votes") ) .group_by( - AnnotationAnonURLType.url_id, - AnnotationAnonURLType.url_type + AnnotationURLTypeAnon.url_id, + AnnotationURLTypeAnon.url_type ) ) diff --git a/src/db/client/async_.py b/src/db/client/async_.py index c780f9d1..89187f11 100644 --- a/src/db/client/async_.py +++ b/src/db/client/async_.py @@ -77,9 +77,9 @@ from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.models.templates_.base import Base @@ -263,7 +263,7 @@ async def add_user_relevant_suggestion( ): prior_suggestion = await self.get_user_suggestion( session, - model=AnnotationUserURLType, + model=AnnotationURLTypeUser, user_id=user_id, url_id=url_id ) @@ -271,7 +271,7 @@ async def add_user_relevant_suggestion( prior_suggestion.type = suggested_status.value return - suggestion = AnnotationUserURLType( + suggestion = AnnotationURLTypeUser( url_id=url_id, user_id=user_id, type=suggested_status.value @@ -304,7 +304,7 @@ async def add_user_record_type_suggestion( ): prior_suggestion = await self.get_user_suggestion( session, - model=AnnotationUserRecordType, + model=AnnotationRecordTypeUser, user_id=user_id, url_id=url_id ) @@ -312,7 +312,7 @@ async def add_user_record_type_suggestion( prior_suggestion.record_type = record_type.value return - suggestion = AnnotationUserRecordType( + suggestion = AnnotationRecordTypeUser( url_id=url_id, user_id=user_id, record_type=record_type.value diff --git a/src/db/client/types.py b/src/db/client/types.py index 8e3bff0d..0f132aeb 100644 --- a/src/db/client/types.py +++ b/src/db/client/types.py @@ -1,5 +1,5 @@ from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser -UserSuggestionModel = AnnotationUserURLType or AnnotationUserRecordType or AnnotationAgencyUser +UserSuggestionModel = AnnotationURLTypeUser or AnnotationRecordTypeUser or AnnotationAgencyUser diff --git a/src/db/constants.py b/src/db/constants.py index 87fd1f19..cf073fb7 100644 --- a/src/db/constants.py +++ b/src/db/constants.py @@ -1,13 +1,13 @@ from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser PLACEHOLDER_AGENCY_NAME = "PLACEHOLDER_AGENCY_NAME" STANDARD_ROW_LIMIT = 100 USER_ANNOTATION_MODELS = [ - AnnotationUserURLType, - AnnotationUserRecordType, + AnnotationURLTypeUser, + AnnotationRecordTypeUser, AnnotationAgencyUser ] \ No newline at end of file diff --git a/src/db/dto_converter.py b/src/db/dto_converter.py index 4eb5a4cd..eec2f32c 100644 --- a/src/db/dto_converter.py +++ b/src/db/dto_converter.py @@ -11,9 +11,9 @@ from src.db.models.impl.url.html.content.enums import HTMLContentType from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser class DTOConverter: @@ -24,7 +24,7 @@ class DTOConverter: @staticmethod def final_review_annotation_relevant_info( - user_suggestions: list[AnnotationUserURLType], + user_suggestions: list[AnnotationURLTypeUser], auto_suggestion: AnnotationAutoURLType ) -> FinalReviewAnnotationRelevantInfo: @@ -44,7 +44,7 @@ def final_review_annotation_relevant_info( @staticmethod def final_review_annotation_record_type_info( - user_suggestions: list[AnnotationUserRecordType], + user_suggestions: list[AnnotationRecordTypeUser], auto_suggestion: AnnotationAutoRecordType ): diff --git a/src/db/models/impl/annotation/name/user/pydantic.py b/src/db/models/impl/annotation/name/user/pydantic.py index 95fe0150..fb662bcd 100644 --- a/src/db/models/impl/annotation/name/user/pydantic.py +++ b/src/db/models/impl/annotation/name/user/pydantic.py @@ -1,4 +1,4 @@ -from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.templates.markers.bulk.insert import BulkInsertableModel @@ -8,5 +8,5 @@ class LinkUserNameSuggestionPydantic(BulkInsertableModel): user_id: int @classmethod - def sa_model(cls) -> type[LinkUserNameSuggestion]: - return LinkUserNameSuggestion \ No newline at end of file + def sa_model(cls) -> type[AnnotationNameUserEndorsement]: + return AnnotationNameUserEndorsement \ No newline at end of file diff --git a/src/db/models/impl/annotation/name/user/sqlalchemy.py b/src/db/models/impl/annotation/name/user/sqlalchemy.py index cf23dd6f..e456d026 100644 --- a/src/db/models/impl/annotation/name/user/sqlalchemy.py +++ b/src/db/models/impl/annotation/name/user/sqlalchemy.py @@ -4,7 +4,7 @@ from src.db.models.templates_.base import Base -class LinkUserNameSuggestion( +class AnnotationNameUserEndorsement( Base, CreatedAtMixin, ): diff --git a/src/db/models/impl/annotation/record_type/anon/sqlalchemy.py b/src/db/models/impl/annotation/record_type/anon/sqlalchemy.py index 304ab1be..7f7ac028 100644 --- a/src/db/models/impl/annotation/record_type/anon/sqlalchemy.py +++ b/src/db/models/impl/annotation/record_type/anon/sqlalchemy.py @@ -7,7 +7,7 @@ from src.db.models.templates_.base import Base -class AnnotationAnonRecordType( +class AnnotationRecordTypeAnon( Base, URLDependentMixin, CreatedAtMixin, diff --git a/src/db/models/impl/annotation/record_type/user/user.py b/src/db/models/impl/annotation/record_type/user/user.py index 689d985b..c4a84b72 100644 --- a/src/db/models/impl/annotation/record_type/user/user.py +++ b/src/db/models/impl/annotation/record_type/user/user.py @@ -8,7 +8,7 @@ from src.db.models.types import record_type_values -class AnnotationUserRecordType( +class AnnotationRecordTypeUser( UpdatedAtMixin, CreatedAtMixin, URLDependentMixin, diff --git a/src/db/models/impl/annotation/url_type/anon/sqlalchemy.py b/src/db/models/impl/annotation/url_type/anon/sqlalchemy.py index a1de1826..e8a8db18 100644 --- a/src/db/models/impl/annotation/url_type/anon/sqlalchemy.py +++ b/src/db/models/impl/annotation/url_type/anon/sqlalchemy.py @@ -7,7 +7,7 @@ from src.db.models.templates_.base import Base -class AnnotationAnonURLType( +class AnnotationURLTypeAnon( Base, URLDependentMixin, CreatedAtMixin, diff --git a/src/db/models/impl/annotation/url_type/user/sqlalchemy.py b/src/db/models/impl/annotation/url_type/user/sqlalchemy.py index af84a758..8f02a65d 100644 --- a/src/db/models/impl/annotation/url_type/user/sqlalchemy.py +++ b/src/db/models/impl/annotation/url_type/user/sqlalchemy.py @@ -9,7 +9,7 @@ from src.db.models.templates_.with_id import WithIDBase -class AnnotationUserURLType( +class AnnotationURLTypeUser( UpdatedAtMixin, CreatedAtMixin, URLDependentMixin, diff --git a/src/db/models/impl/url/core/sqlalchemy.py b/src/db/models/impl/url/core/sqlalchemy.py index dd52c1e1..a4295ae3 100644 --- a/src/db/models/impl/url/core/sqlalchemy.py +++ b/src/db/models/impl/url/core/sqlalchemy.py @@ -15,9 +15,9 @@ from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from src.db.models.mixins import UpdatedAtMixin, CreatedAtMixin from src.db.models.templates_.with_id import WithIDBase @@ -118,12 +118,12 @@ def full_url(cls): auto_record_type_suggestion = relationship( AnnotationAutoRecordType, uselist=False, back_populates="url") user_record_type_suggestions = relationship( - AnnotationUserRecordType, back_populates="url") + AnnotationRecordTypeUser, back_populates="url") # Relvant/URL Type auto_relevant_suggestion = relationship( AnnotationAutoURLType, uselist=False, back_populates="url") user_relevant_suggestions = relationship( - AnnotationUserURLType, back_populates="url") + AnnotationURLTypeUser, back_populates="url") reviewing_user = relationship( "ReviewingUserURL", uselist=False, back_populates="url") diff --git a/src/db/queries/implementations/core/common/annotation_exists_/constants.py b/src/db/queries/implementations/core/common/annotation_exists_/constants.py index 5851b42b..dbdfaa1b 100644 --- a/src/db/queries/implementations/core/common/annotation_exists_/constants.py +++ b/src/db/queries/implementations/core/common/annotation_exists_/constants.py @@ -1,15 +1,15 @@ from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser ALL_ANNOTATION_MODELS = [ AnnotationAutoRecordType, AnnotationAutoURLType, AnnotationAgencyAutoSubtask, - AnnotationUserURLType, - AnnotationUserRecordType, + AnnotationURLTypeUser, + AnnotationRecordTypeUser, AnnotationAgencyUser ] diff --git a/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py b/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py index be8a76f9..c224fa40 100644 --- a/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py +++ b/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py @@ -7,8 +7,8 @@ from src.collectors.enums import URLStatus from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.mixins import URLDependentMixin from src.db.queries.base.builder import QueryBuilderBase from src.db.queries.implementations.core.common.annotation_exists_.core import AnnotationExistsCTEQueryBuilder @@ -17,11 +17,11 @@ class PendingAnnotationExistsCTEQueryBuilder(AnnotationExistsCTEQueryBuilder): @property def has_user_relevant_annotation(self): - return self.get_exists_for_model(AnnotationUserURLType) + return self.get_exists_for_model(AnnotationURLTypeUser) @property def has_user_record_type_annotation(self): - return self.get_exists_for_model(AnnotationUserRecordType) + return self.get_exists_for_model(AnnotationRecordTypeUser) @property def has_user_agency_annotation(self): diff --git a/src/db/types.py b/src/db/types.py index 0ff28637..df065cab 100644 --- a/src/db/types.py +++ b/src/db/types.py @@ -1,10 +1,10 @@ from typing import TypeVar from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.queries.base.labels import LabelsBase -UserSuggestionType = AnnotationAgencyUser | AnnotationUserURLType | AnnotationUserRecordType +UserSuggestionType = AnnotationAgencyUser | AnnotationURLTypeUser | AnnotationRecordTypeUser LabelsType = TypeVar("LabelsType", bound=LabelsBase) \ No newline at end of file diff --git a/tests/automated/integration/api/annotate/all/test_happy_path.py b/tests/automated/integration/api/annotate/all/test_happy_path.py index faed2220..0ed29def 100644 --- a/tests/automated/integration/api/annotate/all/test_happy_path.py +++ b/tests/automated/integration/api/annotate/all/test_happy_path.py @@ -12,9 +12,9 @@ from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review @@ -106,7 +106,7 @@ async def test_annotate_all( # Check that all annotations are present in the database # Check URL Type Suggestions - all_relevance_suggestions: list[AnnotationUserURLType] = await adb_client.get_all(AnnotationUserURLType) + all_relevance_suggestions: list[AnnotationURLTypeUser] = await adb_client.get_all(AnnotationURLTypeUser) assert len(all_relevance_suggestions) == 4 suggested_types: set[URLType] = {sugg.type for sugg in all_relevance_suggestions} assert suggested_types == {URLType.DATA_SOURCE, URLType.NOT_RELEVANT} @@ -118,7 +118,7 @@ async def test_annotate_all( assert agency_id in suggested_agency_ids # Should be one record type - all_record_type_suggestions = await adb_client.get_all(AnnotationUserRecordType) + all_record_type_suggestions = await adb_client.get_all(AnnotationRecordTypeUser) assert len(all_record_type_suggestions) == 3 suggested_record_types: set[RecordType] = { sugg.record_type for sugg in all_record_type_suggestions @@ -172,6 +172,6 @@ async def test_annotate_all( assert "New Name" in suggested_names # Confirm 2 link user name suggestions - link_user_name_suggestions: list[LinkUserNameSuggestion] = await adb_client.get_all(LinkUserNameSuggestion) + link_user_name_suggestions: list[AnnotationNameUserEndorsement] = await adb_client.get_all(AnnotationNameUserEndorsement) assert len(link_user_name_suggestions) == 2 diff --git a/tests/automated/integration/api/annotate/anonymous/test_core.py b/tests/automated/integration/api/annotate/anonymous/test_core.py index 48fb4f4d..e977accb 100644 --- a/tests/automated/integration/api/annotate/anonymous/test_core.py +++ b/tests/automated/integration/api/annotate/anonymous/test_core.py @@ -8,18 +8,25 @@ from src.api.endpoints.annotate.all.post.models.name import AnnotationPostNameInfo from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo from src.api.endpoints.annotate.anonymous.get.response import GetNextURLForAnonymousAnnotationResponse +from src.api.shared.models.message_response import MessageResponse from src.core.enums import RecordType from src.db.dtos.url.mapping_.simple import SimpleURLMapping from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion -from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType -from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement from src.db.models.mixins import URLDependentMixin from tests.automated.integration.api.annotate.anonymous.helper import get_next_url_for_anonymous_annotation, \ post_and_get_next_url_for_anonymous_annotation +from tests.automated.integration.conftest import MOCK_USER_ID from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review from tests.helpers.setup.final_review.model import FinalReviewSetupInfo @@ -83,8 +90,8 @@ async def test_annotate_anonymous( for model in [ AnnotationAgencyAnon, AnnotationLocationAnon, - AnnotationAnonRecordType, - AnnotationAnonURLType + AnnotationRecordTypeAnon, + AnnotationURLTypeAnon ]: instances: list[URLDependentMixin] = await ddc.adb_client.get_all(model) assert len(instances) == 1 @@ -113,3 +120,52 @@ async def test_annotate_anonymous( assert get_response_3.next_annotation is not None assert get_response_3.next_annotation.url_info.url_id == post_response_1.next_annotation.url_info.url_id + ### TEST MIGRATION ### + # Call the migration endpoint with a user ID, and confirm all anonymous annotations have transferred to the user. + response: MessageResponse = rv.post_v3( + f'/annotate/migrate?session_id={session_id}', + expected_model=MessageResponse, + ) + assert response.message == 'Annotations migrated successfully.' + + # Check all annotations + + # URL Types + url_types: list[AnnotationURLTypeUser] = await ddc.adb_client.get_all(AnnotationURLTypeUser) + assert len(url_types) == 3 + annotation_url_type: AnnotationURLTypeUser = url_types[-1] + assert annotation_url_type.user_id == MOCK_USER_ID + assert annotation_url_type.url_id == get_response_1.next_annotation.url_info.url_id + assert annotation_url_type.type == URLType.DATA_SOURCE + + # Locations + locations: list[AnnotationLocationUser] = await ddc.adb_client.get_all(AnnotationLocationUser) + assert len(locations) == 1 + annotation_location: AnnotationLocationUser = locations[0] + assert annotation_location.user_id == MOCK_USER_ID + assert annotation_location.url_id == get_response_1.next_annotation.url_info.url_id + assert annotation_location.location_id == pennsylvania.location_id + + # Agencies + agencies: list[AnnotationAgencyUser] = await ddc.adb_client.get_all(AnnotationAgencyUser) + assert len(agencies) == 3 + annotation_agency: AnnotationAgencyUser = agencies[-1] + assert annotation_agency.user_id == MOCK_USER_ID + assert annotation_agency.url_id == get_response_1.next_annotation.url_info.url_id + assert annotation_agency.agency_id == agency_id + + # Record Types + record_types: list[AnnotationRecordTypeUser] = await ddc.adb_client.get_all(AnnotationRecordTypeUser) + assert len(record_types) == 3 + annotation_record_type: AnnotationRecordTypeUser = record_types[-1] + assert annotation_record_type.user_id == MOCK_USER_ID + assert annotation_record_type.url_id == get_response_1.next_annotation.url_info.url_id + assert annotation_record_type.record_type == RecordType.ACCIDENT_REPORTS.value + + # Name Suggestions + name_suggestions: list[AnnotationNameUserEndorsement] = await ddc.adb_client.get_all(AnnotationNameUserEndorsement) + assert len(name_suggestions) == 1 + annotation_name: AnnotationNameUserEndorsement = name_suggestions[0] + assert annotation_name.user_id == MOCK_USER_ID + + diff --git a/tests/automated/integration/api/submit/data_source/test_core.py b/tests/automated/integration/api/submit/data_source/test_core.py index 5a8fb103..51bbc93b 100644 --- a/tests/automated/integration/api/submit/data_source/test_core.py +++ b/tests/automated/integration/api/submit/data_source/test_core.py @@ -10,8 +10,8 @@ from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion -from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType -from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL @@ -96,7 +96,7 @@ async def test_submit_data_source( assert batch_url_link.url_id == url.id # Check for anonymous annotations - url_type_suggestion: AnnotationAnonURLType = await adb_client.one_or_none_model(AnnotationAnonURLType) + url_type_suggestion: AnnotationURLTypeAnon = await adb_client.one_or_none_model(AnnotationURLTypeAnon) assert url_type_suggestion is not None assert url_type_suggestion.url_id == url.id assert url_type_suggestion.url_type == URLType.DATA_SOURCE @@ -120,7 +120,7 @@ async def test_submit_data_source( assert name_suggestion.suggestion == "Example name" # Check for Record Type Suggestion - record_type_suggestion: AnnotationAnonRecordType = await adb_client.one_or_none_model(AnnotationAnonRecordType) + record_type_suggestion: AnnotationRecordTypeAnon = await adb_client.one_or_none_model(AnnotationRecordTypeAnon) assert record_type_suggestion.record_type == RecordType.COMPLAINTS_AND_MISCONDUCT assert record_type_suggestion.session_id == session_id diff --git a/tests/automated/integration/api/submit/test_url_maximal.py b/tests/automated/integration/api/submit/test_url_maximal.py index 27fbfe2f..1d458c98 100644 --- a/tests/automated/integration/api/submit/test_url_maximal.py +++ b/tests/automated/integration/api/submit/test_url_maximal.py @@ -9,10 +9,10 @@ from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion -from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.models.impl.link.user_suggestion_not_found.users_submitted_url.sqlalchemy import LinkUserSubmittedURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from tests.helpers.api_test_helper import APITestHelper from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo @@ -75,13 +75,13 @@ async def test_maximal( assert name_sugg.suggestion == "Example URL" assert name_sugg.source == NameSuggestionSource.USER - name_link_suggs: list[LinkUserNameSuggestion] = await adb_client.get_all(LinkUserNameSuggestion) + name_link_suggs: list[AnnotationNameUserEndorsement] = await adb_client.get_all(AnnotationNameUserEndorsement) assert len(name_link_suggs) == 1 - name_link_sugg: LinkUserNameSuggestion = name_link_suggs[0] + name_link_sugg: AnnotationNameUserEndorsement = name_link_suggs[0] assert name_link_sugg.suggestion_id == name_sugg.id - rec_suggs: list[AnnotationUserRecordType] = await adb_client.get_all(AnnotationUserRecordType) + rec_suggs: list[AnnotationRecordTypeUser] = await adb_client.get_all(AnnotationRecordTypeUser) assert len(rec_suggs) == 1 - rec_sugg: AnnotationUserRecordType = rec_suggs[0] + rec_sugg: AnnotationRecordTypeUser = rec_suggs[0] assert rec_sugg.url_id == url_id assert rec_sugg.record_type == RecordType.INCARCERATION_RECORDS.value diff --git a/tests/automated/integration/api/url/by_id/delete/test_any_url.py b/tests/automated/integration/api/url/by_id/delete/test_any_url.py index 49f7a407..2711c103 100644 --- a/tests/automated/integration/api/url/by_id/delete/test_any_url.py +++ b/tests/automated/integration/api/url/by_id/delete/test_any_url.py @@ -19,8 +19,8 @@ from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion -from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType -from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon from src.db.models.impl.change_log import ChangeLog from src.db.models.impl.flag.checked_for_ia.sqlalchemy import FlagURLCheckedForInternetArchives from src.db.models.impl.flag.root_url.sqlalchemy import FlagRootURL @@ -29,7 +29,7 @@ from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.link.url_redirect_url.sqlalchemy import LinkURLRedirectURL from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL -from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.impl.link.user_suggestion_not_found.users_submitted_url.sqlalchemy import LinkUserSubmittedURL @@ -41,9 +41,9 @@ from src.db.models.impl.url.internet_archives.save.sqlalchemy import URLInternetArchivesSaveMetadata from src.db.models.impl.url.screenshot.sqlalchemy import URLScreenshot from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.queries.implementations.anonymous_session import MakeAnonymousSessionQueryBuilder @@ -141,9 +141,9 @@ async def _check_results( ### Agency AnnotationAgencyUser, ### Record Type - AnnotationUserRecordType, + AnnotationRecordTypeUser, ### URL Type - AnnotationUserURLType, + AnnotationURLTypeUser, ### Location AnnotationLocationUser, AnnotationNameSuggestion, @@ -153,9 +153,9 @@ async def _check_results( ### Location AnnotationLocationAnon, ### Record Type - AnnotationAnonRecordType, + AnnotationRecordTypeAnon, ### URL Type - AnnotationAnonURLType, + AnnotationURLTypeAnon, ] for model in models: assert await dbc.get_all(model) == [] @@ -380,7 +380,7 @@ async def _setup( ) ### Record Type await dbc.add( - AnnotationUserRecordType( + AnnotationRecordTypeUser( url_id=url.url_id, user_id=1, record_type=RecordType.BOOKING_REPORTS.value, @@ -388,7 +388,7 @@ async def _setup( ) ### URL Type await dbc.add( - AnnotationUserURLType( + AnnotationURLTypeUser( url_id=url.url_id, type=URLType.INDIVIDUAL_RECORD, user_id=1 @@ -412,7 +412,7 @@ async def _setup( return_id=True ) await dbc.add( - LinkUserNameSuggestion( + AnnotationNameUserEndorsement( suggestion_id=name_suggestion_id, user_id=1, ) @@ -429,13 +429,13 @@ async def _setup( session_id=session_id, ), ### Record Type - AnnotationAnonRecordType( + AnnotationRecordTypeAnon( url_id=url.url_id, record_type=RecordType.BOOKING_REPORTS.value, session_id=session_id, ), ### URL Type - AnnotationAnonURLType( + AnnotationURLTypeAnon( url_id=url.url_id, url_type=URLType.INDIVIDUAL_RECORD, session_id=session_id, diff --git a/tests/automated/integration/readonly/setup/annotations.py b/tests/automated/integration/readonly/setup/annotations.py index ab2ef13e..9e701f62 100644 --- a/tests/automated/integration/readonly/setup/annotations.py +++ b/tests/automated/integration/readonly/setup/annotations.py @@ -5,9 +5,9 @@ from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion -from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser async def add_full_data_sources_annotations( @@ -26,17 +26,17 @@ async def add_full_data_sources_annotations( name_suggestion, return_id=True ) - url_type_suggestion = AnnotationUserURLType( + url_type_suggestion = AnnotationURLTypeUser( url_id=url_id, user_id=user_id, type=URLType.DATA_SOURCE ) - record_type_suggestion = AnnotationUserRecordType( + record_type_suggestion = AnnotationRecordTypeUser( user_id=user_id, url_id=url_id, record_type=RecordType.RECORDS_REQUEST_INFO.value ) - user_name_suggestion = LinkUserNameSuggestion( + user_name_suggestion = AnnotationNameUserEndorsement( user_id=user_id, suggestion_id=name_suggestion_id, ) @@ -64,7 +64,7 @@ async def add_minimal_not_relevant_annotation( user_id: int, adb_client: AsyncDatabaseClient ) -> None: - url_type_suggestion = AnnotationUserURLType( + url_type_suggestion = AnnotationURLTypeUser( url_id=url_id, user_id=user_id, type=URLType.NOT_RELEVANT diff --git a/tests/automated/integration/tasks/url/impl/validate/test_data_source.py b/tests/automated/integration/tasks/url/impl/validate/test_data_source.py index d99e4448..95d636c2 100644 --- a/tests/automated/integration/tasks/url/impl/validate/test_data_source.py +++ b/tests/automated/integration/tasks/url/impl/validate/test_data_source.py @@ -14,8 +14,8 @@ from src.core.tasks.url.operators.validate.core import AutoValidateURLTaskOperator from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon -from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationAnonRecordType -from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationAnonURLType +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement from tests.automated.integration.tasks.url.impl.validate.helper import TestValidateTaskHelper, DEFAULT_RECORD_TYPE @@ -55,12 +55,12 @@ async def test_data_source( session_id_2: UUID = await helper.get_anonymous_session_id() for session_id in [session_id_1, session_id_2]: - anon_url_type = AnnotationAnonURLType( + anon_url_type = AnnotationURLTypeAnon( url_type=URLType.DATA_SOURCE, session_id=session_id, url_id=helper.url_id ) - anon_record_type = AnnotationAnonRecordType( + anon_record_type = AnnotationRecordTypeAnon( record_type=DEFAULT_RECORD_TYPE, session_id=session_id, url_id=helper.url_id @@ -101,7 +101,7 @@ async def test_data_source( # Add tiebreaker -- a single anonymous vote session_id_3: UUID = await helper.get_anonymous_session_id() - anon_record_type = AnnotationAnonRecordType( + anon_record_type = AnnotationRecordTypeAnon( record_type=DEFAULT_RECORD_TYPE, session_id=session_id_3, url_id=helper.url_id diff --git a/tests/helpers/data_creator/core.py b/tests/helpers/data_creator/core.py index d3252202..d3f6c924 100644 --- a/tests/helpers/data_creator/core.py +++ b/tests/helpers/data_creator/core.py @@ -26,7 +26,7 @@ from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL -from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.impl.url.core.enums import URLSource @@ -709,7 +709,7 @@ async def user_name_endorsement( suggestion_id: int, user_id: int, ): - link = LinkUserNameSuggestion( + link = AnnotationNameUserEndorsement( suggestion_id=suggestion_id, user_id=user_id, ) From 814e3b817d2094af0b36f0c1eedf0373b257b472 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Sat, 20 Dec 2025 17:55:50 -0500 Subject: [PATCH 11/24] Add logic for handling conflicts --- src/api/endpoints/annotate/migrate/query.py | 23 +++++++++++---------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/api/endpoints/annotate/migrate/query.py b/src/api/endpoints/annotate/migrate/query.py index 327f43ce..0fe51b68 100644 --- a/src/api/endpoints/annotate/migrate/query.py +++ b/src/api/endpoints/annotate/migrate/query.py @@ -1,7 +1,8 @@ from typing import Any from uuid import UUID -from sqlalchemy import insert, select, delete +from sqlalchemy import select, delete +from sqlalchemy.dialects.postgresql import insert from sqlalchemy.ext.asyncio import AsyncSession from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon @@ -45,8 +46,8 @@ async def migrate_agency_annotations(self, session: AsyncSession) -> None: self.user_id ).where( AnnotationAgencyAnon.session_id == self.session_id - ) - ) + ).distinct() + ).on_conflict_do_nothing(index_elements=["agency_id", "url_id", "user_id"]) await session.execute(statement) # Delete all anonymous agency annotations. statement = delete(AnnotationAgencyAnon).where( @@ -65,8 +66,8 @@ async def migrate_location_annotations(self, session: AsyncSession) -> None: self.user_id ).where( AnnotationLocationAnon.session_id == self.session_id - ) - ) + ).distinct() + ).on_conflict_do_nothing(index_elements=["location_id", "url_id", "user_id"]) await session.execute(statement) # Delete all anonymous location annotations. statement = delete(AnnotationLocationAnon).where( @@ -84,8 +85,8 @@ async def migrate_record_type_annotations(self, session: AsyncSession) -> None: self.user_id ).where( AnnotationRecordTypeAnon.session_id == self.session_id - ) - ) + ).distinct() + ).on_conflict_do_nothing(index_elements=["url_id", "user_id"]) await session.execute(statement) # Delete all anonymous record type annotations. statement = delete(AnnotationRecordTypeAnon).where( @@ -103,8 +104,8 @@ async def migrate_url_type_annotations(self, session: AsyncSession) -> None: self.user_id ).where( AnnotationURLTypeAnon.session_id == self.session_id - ) - ) + ).distinct() + ).on_conflict_do_nothing(index_elements=["url_id", "user_id"]) await session.execute(statement) # Delete all anonymous url type annotations. statement = delete(AnnotationURLTypeAnon).where( @@ -121,8 +122,8 @@ async def migrate_name_annotations(self, session: AsyncSession) -> None: self.user_id ).where( AnnotationNameAnonEndorsement.session_id == self.session_id - ) - ) + ).distinct() + ).on_conflict_do_nothing(index_elements=["suggestion_id", "user_id"]) await session.execute(statement) # Delete all anonymous name annotations. statement = delete(AnnotationNameAnonEndorsement).where( From a9486b5e06d1692859cab019647dd6a4ea6b3183 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Sun, 21 Dec 2025 19:54:54 -0500 Subject: [PATCH 12/24] Add logic for including anonymous annotation counts --- src/api/endpoints/annotate/_shared/extract.py | 11 +- .../annotate/_shared/queries/helper.py | 5 +- .../all/get/queries/agency/requester.py | 34 ++++- .../annotate/all/get/queries/convert.py | 24 +++- .../all/get/queries/location_/core.py | 1 - .../all/get/queries/location_/requester.py | 34 ++++- .../annotate/all/get/queries/name/core.py | 17 ++- src/api/endpoints/pending/__init__.py | 0 .../endpoints/pending/agencies/__init__.py | 0 .../pending/agencies/approve/__init__.py | 0 .../pending/agencies/get/__init__.py | 0 src/api/endpoints/pending/routes.py | 0 src/db/models/impl/agency/sqlalchemy.py | 2 +- .../annotation/url_type/auto/sqlalchemy.py | 2 +- .../annotation/url_type/user/sqlalchemy.py | 2 +- src/db/models/impl/url/core/sqlalchemy.py | 20 ++- .../api/annotate/all/test_anon_count.py | 124 ++++++++++++++++++ .../api/annotate/all/test_happy_path.py | 2 +- .../integration/api/pending/__init__.py | 0 .../integration/api/pending/test_agencies.py | 16 +++ 20 files changed, 269 insertions(+), 25 deletions(-) create mode 100644 src/api/endpoints/pending/__init__.py create mode 100644 src/api/endpoints/pending/agencies/__init__.py create mode 100644 src/api/endpoints/pending/agencies/approve/__init__.py create mode 100644 src/api/endpoints/pending/agencies/get/__init__.py create mode 100644 src/api/endpoints/pending/routes.py create mode 100644 tests/automated/integration/api/annotate/all/test_anon_count.py create mode 100644 tests/automated/integration/api/pending/__init__.py create mode 100644 tests/automated/integration/api/pending/test_agencies.py diff --git a/src/api/endpoints/annotate/_shared/extract.py b/src/api/endpoints/annotate/_shared/extract.py index 12368cd6..4a7517eb 100644 --- a/src/api/endpoints/annotate/_shared/extract.py +++ b/src/api/endpoints/annotate/_shared/extract.py @@ -28,18 +28,25 @@ async def extract_and_format_get_annotation_result( html_response_info = DTOConverter.html_content_list_to_html_response_info( url.html_content ) + # URL Types url_type_suggestions: list[URLTypeAnnotationSuggestion] = \ convert_user_url_type_suggestion_to_url_type_annotation_suggestion( - url.user_relevant_suggestions + url.user_url_type_suggestions, + url.anon_url_type_suggestions ) + # Record Types record_type_suggestions: RecordTypeAnnotationResponseOuterInfo = \ convert_user_record_type_suggestion_to_record_type_annotation_suggestion( - url.user_record_type_suggestions + url.user_record_type_suggestions, + url.anon_record_type_suggestions ) + # Agencies agency_suggestions: AgencyAnnotationResponseOuterInfo = \ await GetAgencySuggestionsQueryBuilder(url_id=url.id).run(session) + # Locations location_suggestions: LocationAnnotationResponseOuterInfo = \ await GetLocationSuggestionsQueryBuilder(url_id=url.id).run(session) + # Names name_suggestions: NameAnnotationResponseOuterInfo = \ await GetNameSuggestionsQueryBuilder(url_id=url.id).run(session) return GetNextURLForAllAnnotationResponse( diff --git a/src/api/endpoints/annotate/_shared/queries/helper.py b/src/api/endpoints/annotate/_shared/queries/helper.py index 3f3745f5..f8bdf033 100644 --- a/src/api/endpoints/annotate/_shared/queries/helper.py +++ b/src/api/endpoints/annotate/_shared/queries/helper.py @@ -47,9 +47,10 @@ def conclude(query: Select) -> Select: # Add load options query.options( joinedload(URL.html_content), - joinedload(URL.user_relevant_suggestions), + joinedload(URL.user_url_type_suggestions), joinedload(URL.user_record_type_suggestions), - joinedload(URL.name_suggestions), + joinedload(URL.anon_record_type_suggestions), + joinedload(URL.anon_url_type_suggestions), ) # Sorting Priority .order_by( diff --git a/src/api/endpoints/annotate/all/get/queries/agency/requester.py b/src/api/endpoints/annotate/all/get/queries/agency/requester.py index 68d801b5..8c50b41d 100644 --- a/src/api/endpoints/annotate/all/get/queries/agency/requester.py +++ b/src/api/endpoints/annotate/all/get/queries/agency/requester.py @@ -8,6 +8,7 @@ from src.db.helpers.query import exists_url from src.db.helpers.session import session_helper as sh from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser @@ -40,6 +41,9 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]: ), exists_url( AnnotationAgencyAutoSubtask + ), + exists_url( + AnnotationAgencyAnon ) ) ) @@ -60,6 +64,20 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]: .cte("user_suggestions") ) + # Number of anon users who suggested each agency + anon_suggestions_cte = ( + select( + AnnotationAgencyAnon.url_id, + AnnotationAgencyAnon.agency_id, + func.count(AnnotationAgencyAnon.session_id).label('anon_count') + ) + .group_by( + AnnotationAgencyAnon.agency_id, + AnnotationAgencyAnon.url_id, + ) + .cte("anon_suggestions") + ) + # Maximum confidence of robo annotation, if any robo_suggestions_cte = ( select( @@ -88,6 +106,7 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]: Agency.name.label("display_name"), func.coalesce(user_suggestions_cte.c.user_count, 0).label('user_count'), func.coalesce(robo_suggestions_cte.c.robo_confidence, 0).label('robo_confidence'), + func.coalesce(anon_suggestions_cte.c.anon_count, 0).label('anon_count'), ) .join( Agency, @@ -100,6 +119,13 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]: user_suggestions_cte.c.agency_id == Agency.id ) ) + .outerjoin( + anon_suggestions_cte, + and_( + anon_suggestions_cte.c.url_id == self.url_id, + anon_suggestions_cte.c.agency_id == Agency.id + ) + ) .outerjoin( robo_suggestions_cte, and_( @@ -110,7 +136,8 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]: .where( or_( user_suggestions_cte.c.user_count > 0, - robo_suggestions_cte.c.robo_confidence > 0 + robo_suggestions_cte.c.robo_confidence > 0, + anon_suggestions_cte.c.anon_count > 0 ) ) ) @@ -119,7 +146,10 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]: mappings: Sequence[RowMapping] = await self.mappings(joined_suggestions_query) suggestions: list[SuggestionModel] = [ SuggestionModel( - **mapping + id=mapping["id"], + display_name=mapping["display_name"], + user_count=mapping['user_count'] + (mapping['anon_count'] // 2), + robo_confidence=mapping["robo_confidence"] ) for mapping in mappings ] diff --git a/src/api/endpoints/annotate/all/get/queries/convert.py b/src/api/endpoints/annotate/all/get/queries/convert.py index 80625d3c..fedfa8a2 100644 --- a/src/api/endpoints/annotate/all/get/queries/convert.py +++ b/src/api/endpoints/annotate/all/get/queries/convert.py @@ -1,43 +1,55 @@ +import math from collections import Counter from src.api.endpoints.annotate.all.get.models.record_type import RecordTypeAnnotationResponseOuterInfo, \ RecordTypeSuggestionModel from src.api.endpoints.annotate.all.get.models.url_type import URLTypeAnnotationSuggestion from src.core.enums import RecordType +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser def convert_user_url_type_suggestion_to_url_type_annotation_suggestion( - db_suggestions: list[AnnotationURLTypeUser] + user_suggestions: list[AnnotationURLTypeUser], + anon_suggestions: list[AnnotationURLTypeAnon] ) -> list[URLTypeAnnotationSuggestion]: counter: Counter[URLType] = Counter() - for suggestion in db_suggestions: + for suggestion in user_suggestions: counter[suggestion.type] += 1 + + for suggestion in anon_suggestions: + counter[suggestion.url_type] += 0.5 + anno_suggestions: list[URLTypeAnnotationSuggestion] = [] for url_type, endorsement_count in counter.most_common(3): anno_suggestions.append( URLTypeAnnotationSuggestion( url_type=url_type, - endorsement_count=endorsement_count, + endorsement_count=math.floor(endorsement_count), ) ) return anno_suggestions def convert_user_record_type_suggestion_to_record_type_annotation_suggestion( - db_suggestions: list[AnnotationRecordTypeUser] + user_suggestions: list[AnnotationRecordTypeUser], + anon_suggestions: list[AnnotationRecordTypeAnon] ) -> RecordTypeAnnotationResponseOuterInfo: counter: Counter[RecordType] = Counter() - for suggestion in db_suggestions: + for suggestion in user_suggestions: counter[suggestion.record_type] += 1 + for suggestion in anon_suggestions: + counter[suggestion.record_type] += 0.5 + suggestions: list[RecordTypeSuggestionModel] = [] for record_type, endorsement_count in counter.most_common(3): suggestions.append( RecordTypeSuggestionModel( record_type=record_type, - user_count=endorsement_count, + user_count=math.floor(endorsement_count), robo_confidence=0, ) ) diff --git a/src/api/endpoints/annotate/all/get/queries/location_/core.py b/src/api/endpoints/annotate/all/get/queries/location_/core.py index 6081c5f7..5d03bc55 100644 --- a/src/api/endpoints/annotate/all/get/queries/location_/core.py +++ b/src/api/endpoints/annotate/all/get/queries/location_/core.py @@ -15,7 +15,6 @@ def __init__( super().__init__() self.url_id = url_id - # TODO: Test async def run(self, session: AsyncSession) -> LocationAnnotationResponseOuterInfo: requester = GetLocationSuggestionsRequester(session) diff --git a/src/api/endpoints/annotate/all/get/queries/location_/requester.py b/src/api/endpoints/annotate/all/get/queries/location_/requester.py index 49f00f89..b8ba5410 100644 --- a/src/api/endpoints/annotate/all/get/queries/location_/requester.py +++ b/src/api/endpoints/annotate/all/get/queries/location_/requester.py @@ -6,6 +6,7 @@ from src.api.endpoints.annotate.all.get.queries._shared.sort import sort_suggestions from src.db.helpers.query import exists_url from src.db.helpers.session import session_helper as sh +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser @@ -29,6 +30,9 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]: ), exists_url( AnnotationLocationAutoSubtask + ), + exists_url( + AnnotationLocationAnon ) ) ) @@ -47,6 +51,20 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]: ) .cte("user_suggestions") ) + # Number of anon users who suggested each location + anon_suggestions_cte = ( + select( + AnnotationLocationAnon.url_id, + AnnotationLocationAnon.location_id, + func.count(AnnotationLocationAnon.session_id).label('anon_count') + ) + .group_by( + AnnotationLocationAnon.location_id, + AnnotationLocationAnon.url_id, + ) + .cte("anon_suggestions") + ) + # Maximum confidence of robo annotation, if any robo_suggestions_cte = ( select( @@ -75,6 +93,7 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]: LocationExpandedView.full_display_name.label("display_name"), func.coalesce(user_suggestions_cte.c.user_count, 0).label("user_count"), func.coalesce(robo_suggestions_cte.c.robo_confidence, 0).label("robo_confidence"), + func.coalesce(anon_suggestions_cte.c.anon_count, 0).label("anon_count"), ) .join( LocationExpandedView, @@ -87,6 +106,13 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]: user_suggestions_cte.c.location_id == LocationExpandedView.id ) ) + .outerjoin( + anon_suggestions_cte, + and_( + anon_suggestions_cte.c.url_id == url_id, + anon_suggestions_cte.c.location_id == LocationExpandedView.id + ) + ) .outerjoin( robo_suggestions_cte, and_( @@ -97,7 +123,8 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]: .where( or_( user_suggestions_cte.c.user_count > 0, - robo_suggestions_cte.c.robo_confidence > 0 + robo_suggestions_cte.c.robo_confidence > 0, + anon_suggestions_cte.c.anon_count > 0 ) ) ) @@ -105,7 +132,10 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]: mappings: Sequence[RowMapping] = await self.mappings(joined_suggestions_query) suggestions: list[SuggestionModel] = [ SuggestionModel( - **mapping + id=mapping["id"], + display_name=mapping["display_name"], + user_count=mapping['user_count'] + (mapping['anon_count'] // 2), + robo_confidence=mapping["robo_confidence"] ) for mapping in mappings ] diff --git a/src/api/endpoints/annotate/all/get/queries/name/core.py b/src/api/endpoints/annotate/all/get/queries/name/core.py index 3cc1324d..b41ee4fd 100644 --- a/src/api/endpoints/annotate/all/get/queries/name/core.py +++ b/src/api/endpoints/annotate/all/get/queries/name/core.py @@ -5,6 +5,7 @@ from src.api.endpoints.annotate.all.get.models.name import NameAnnotationSuggestion, NameAnnotationResponseOuterInfo from src.db.helpers.session import session_helper as sh +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement @@ -28,6 +29,9 @@ async def run(self, session: AsyncSession) -> NameAnnotationResponseOuterInfo: func.count( AnnotationNameUserEndorsement.user_id ).label('user_count'), + func.count( + AnnotationNameAnonEndorsement.session_id + ).label('anon_count'), case( (AnnotationNameSuggestion.source == NameSuggestionSource.HTML_METADATA_TITLE, 1), else_=0 @@ -37,6 +41,10 @@ async def run(self, session: AsyncSession) -> NameAnnotationResponseOuterInfo: AnnotationNameUserEndorsement, AnnotationNameUserEndorsement.suggestion_id == AnnotationNameSuggestion.id, ) + .outerjoin( + AnnotationNameAnonEndorsement, + AnnotationNameAnonEndorsement.suggestion_id == AnnotationNameSuggestion.id, + ) .where( AnnotationNameSuggestion.url_id == self.url_id, ) @@ -45,7 +53,9 @@ async def run(self, session: AsyncSession) -> NameAnnotationResponseOuterInfo: AnnotationNameSuggestion.suggestion, ) .order_by( - func.count(AnnotationNameUserEndorsement.user_id).desc(), + (func.count(AnnotationNameUserEndorsement.user_id) + func.count( + AnnotationNameUserEndorsement.user_id + )).desc(), AnnotationNameSuggestion.id.asc(), ) .limit(3) @@ -54,7 +64,10 @@ async def run(self, session: AsyncSession) -> NameAnnotationResponseOuterInfo: mappings: Sequence[RowMapping] = await sh.mappings(session, query=query) suggestions = [ NameAnnotationSuggestion( - **mapping + id=mapping["id"], + display_name=mapping["display_name"], + user_count=mapping['user_count'] + (mapping['anon_count'] // 2), + robo_count=mapping["robo_count"] ) for mapping in mappings ] diff --git a/src/api/endpoints/pending/__init__.py b/src/api/endpoints/pending/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/pending/agencies/__init__.py b/src/api/endpoints/pending/agencies/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/pending/agencies/approve/__init__.py b/src/api/endpoints/pending/agencies/approve/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/pending/agencies/get/__init__.py b/src/api/endpoints/pending/agencies/get/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/pending/routes.py b/src/api/endpoints/pending/routes.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/agency/sqlalchemy.py b/src/db/models/impl/agency/sqlalchemy.py index a6c9c1cf..d6ccbc3b 100644 --- a/src/db/models/impl/agency/sqlalchemy.py +++ b/src/db/models/impl/agency/sqlalchemy.py @@ -2,7 +2,7 @@ References an agency in the data sources database. """ -from sqlalchemy import Column, Integer, String, DateTime, Sequence +from sqlalchemy import Column, String from sqlalchemy.orm import relationship, Mapped from src.db.models.helpers import enum_column diff --git a/src/db/models/impl/annotation/url_type/auto/sqlalchemy.py b/src/db/models/impl/annotation/url_type/auto/sqlalchemy.py index cc5fb7b8..d882f667 100644 --- a/src/db/models/impl/annotation/url_type/auto/sqlalchemy.py +++ b/src/db/models/impl/annotation/url_type/auto/sqlalchemy.py @@ -25,4 +25,4 @@ class AnnotationAutoURLType( # Relationships - url = relationship("URL", back_populates="auto_relevant_suggestion") + url = relationship("URL") diff --git a/src/db/models/impl/annotation/url_type/user/sqlalchemy.py b/src/db/models/impl/annotation/url_type/user/sqlalchemy.py index 8f02a65d..1d71483a 100644 --- a/src/db/models/impl/annotation/url_type/user/sqlalchemy.py +++ b/src/db/models/impl/annotation/url_type/user/sqlalchemy.py @@ -29,4 +29,4 @@ class AnnotationURLTypeUser( # Relationships - url = relationship("URL", back_populates="user_relevant_suggestions") + url = relationship("URL") diff --git a/src/db/models/impl/url/core/sqlalchemy.py b/src/db/models/impl/url/core/sqlalchemy.py index a4295ae3..45e8b45b 100644 --- a/src/db/models/impl/url/core/sqlalchemy.py +++ b/src/db/models/impl/url/core/sqlalchemy.py @@ -4,11 +4,16 @@ from src.collectors.enums import URLStatus from src.db.models.helpers import enum_column +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.impl.url.checked_for_duplicate import URLCheckedForDuplicate from src.db.models.impl.url.core.enums import URLSource @@ -107,23 +112,30 @@ def full_url(cls): auto_location_subtasks = relationship( AnnotationLocationAutoSubtask ) + anon_location_suggestions = relationship( + AnnotationLocationAnon) # Agency user_agency_suggestions = relationship( AnnotationAgencyUser, back_populates="url") auto_agency_subtasks = relationship( - AnnotationAgencyAutoSubtask - ) + AnnotationAgencyAutoSubtask) + anon_agency_suggestions = relationship( + AnnotationAgencyAnon) # Record Type auto_record_type_suggestion = relationship( AnnotationAutoRecordType, uselist=False, back_populates="url") user_record_type_suggestions = relationship( AnnotationRecordTypeUser, back_populates="url") + anon_record_type_suggestions = relationship( + AnnotationRecordTypeAnon) # Relvant/URL Type - auto_relevant_suggestion = relationship( + auto_url_type_suggestions = relationship( AnnotationAutoURLType, uselist=False, back_populates="url") - user_relevant_suggestions = relationship( + user_url_type_suggestions = relationship( AnnotationURLTypeUser, back_populates="url") + anon_url_type_suggestions = relationship( + AnnotationURLTypeAnon) reviewing_user = relationship( "ReviewingUserURL", uselist=False, back_populates="url") diff --git a/tests/automated/integration/api/annotate/all/test_anon_count.py b/tests/automated/integration/api/annotate/all/test_anon_count.py new file mode 100644 index 00000000..16fe728b --- /dev/null +++ b/tests/automated/integration/api/annotate/all/test_anon_count.py @@ -0,0 +1,124 @@ +import pytest +import uuid + +from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse +from src.core.enums import RecordType +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon +from src.db.models.impl.anon_session.sqlalchemy import AnonymousSession +from src.db.models.impl.flag.url_validated.enums import URLType +from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo +from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review + + +@pytest.mark.asyncio +async def test_anon_count( + api_test_helper, + test_agency_id: int, + pennsylvania: USStateCreationInfo, +): + """ + Test that the user annotation counts are updated correctly + when anonymous annotations are added. + """ + ath = api_test_helper + adb_client = ath.adb_client() + + # Set up URLs + setup_info_1 = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, include_user_annotations=True + ) + url_id: int = setup_info_1.url_mapping.url_id + + # Add anonymous sessions + anon_sessions: list[AnonymousSession] = [] + for i in range(12): + anon_session = AnonymousSession( + id=uuid.uuid4(), + ) + anon_sessions.append(anon_session) + await adb_client.add_all(anon_sessions) + + def get_anon_session_id(i: int) -> uuid.UUID: + return anon_sessions[i].id + + + # URL Types + url_type_annotations: list[AnnotationURLTypeAnon] = [] + for i in range(3): + url_type_annotation = AnnotationURLTypeAnon( + url_id=url_id, + session_id=get_anon_session_id(i), + url_type=URLType.DATA_SOURCE + ) + url_type_annotations.append(url_type_annotation) + await adb_client.add_all(url_type_annotations) + + + + # Record Types + record_type_annotations: list[AnnotationRecordTypeAnon] = [] + for i in range(5): + record_type_annotation = AnnotationRecordTypeAnon( + url_id=url_id, + session_id=get_anon_session_id(i), + record_type=RecordType.CAR_GPS + ) + record_type_annotations.append(record_type_annotation) + await adb_client.add_all(record_type_annotations) + + + + # Agencies + agency_annotations: list[AnnotationAgencyAnon] = [] + for i in range(7): + agency_annotation = AnnotationAgencyAnon( + url_id=url_id, + agency_id=test_agency_id, + session_id=get_anon_session_id(i) + ) + agency_annotations.append(agency_annotation) + await adb_client.add_all(agency_annotations) + + + # Locations + location_annotations: list[AnnotationLocationAnon] = [] + for i in range(9): + location_annotation = AnnotationLocationAnon( + url_id=url_id, + session_id=get_anon_session_id(i), + location_id=pennsylvania.location_id, + ) + location_annotations.append(location_annotation) + await adb_client.add_all(location_annotations) + + # Name + name_suggestion = AnnotationNameSuggestion( + url_id=url_id, + suggestion="Test Name", + source=NameSuggestionSource.USER, + ) + name_suggestion_id = await adb_client.add(name_suggestion, return_id=True) + + name_annotations: list[AnnotationNameAnonEndorsement] = [] + for i in range(11): + name_annotation = AnnotationNameAnonEndorsement( + suggestion_id=name_suggestion_id, + session_id=get_anon_session_id(i), + ) + name_annotations.append(name_annotation) + await adb_client.add_all(name_annotations) + + # Check that the counts are correct + get_response_1: GetNextURLForAllAnnotationResponse = await ath.request_validator.get_next_url_for_all_annotations() + assert get_response_1.next_annotation is not None + assert get_response_1.next_annotation.name_suggestions.suggestions[1].user_count == 5 + assert get_response_1.next_annotation.location_suggestions.suggestions[0].user_count == 4 + assert get_response_1.next_annotation.agency_suggestions.suggestions[0].user_count == 3 + assert get_response_1.next_annotation.record_type_suggestions.suggestions[0].user_count == 2 + assert get_response_1.next_annotation.url_type_suggestions[0].endorsement_count == 1 diff --git a/tests/automated/integration/api/annotate/all/test_happy_path.py b/tests/automated/integration/api/annotate/all/test_happy_path.py index 0ed29def..a356fa56 100644 --- a/tests/automated/integration/api/annotate/all/test_happy_path.py +++ b/tests/automated/integration/api/annotate/all/test_happy_path.py @@ -46,7 +46,7 @@ async def test_annotate_all( url_mapping_2 = setup_info_2.url_mapping # Get a valid URL to annotate - get_response_1 = await ath.request_validator.get_next_url_for_all_annotations() + get_response_1: GetNextURLForAllAnnotationResponse = await ath.request_validator.get_next_url_for_all_annotations() assert get_response_1.next_annotation is not None assert len(get_response_1.next_annotation.name_suggestions.suggestions) == 1 name_suggestion = get_response_1.next_annotation.name_suggestions.suggestions[0] diff --git a/tests/automated/integration/api/pending/__init__.py b/tests/automated/integration/api/pending/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/api/pending/test_agencies.py b/tests/automated/integration/api/pending/test_agencies.py new file mode 100644 index 00000000..24061804 --- /dev/null +++ b/tests/automated/integration/api/pending/test_agencies.py @@ -0,0 +1,16 @@ +import pytest + +from tests.helpers.api_test_helper import APITestHelper + + +@pytest.mark.asyncio +async def test_agencies(api_test_helper: APITestHelper): + pass + + # Add pending agency + + # Call GET endpoint + + # Call APPROVE endpoint + + # Check agency is added From 9ef70ce532b00442cc6a1b9bc50683bf3293894f Mon Sep 17 00:00:00 2001 From: Max Chis Date: Wed, 24 Dec 2025 18:20:53 -0500 Subject: [PATCH 13/24] Add logic for agency proposals --- ...0ee666f15d1_add_pending_agencies_tables.py | 112 ++++++++++++ .../{pending => proposals}/__init__.py | 0 .../agencies/__init__.py | 0 .../agencies/approve/__init__.py | 0 .../proposals/agencies/approve/query.py | 152 ++++++++++++++++ .../proposals/agencies/approve/response.py | 7 + .../agencies/get/__init__.py | 0 .../endpoints/proposals/agencies/get/query.py | 56 ++++++ .../proposals/agencies/get/response.py | 18 ++ .../proposals/agencies/reject}/__init__.py | 0 .../proposals/agencies/reject/query.py | 83 +++++++++ .../proposals/agencies/reject/request.py | 5 + .../proposals/agencies/reject/response.py | 6 + src/api/endpoints/proposals/routes.py | 56 ++++++ .../routes.py => submit/agency/__init__.py} | 0 src/api/endpoints/submit/agency/enums.py | 8 + src/api/endpoints/submit/agency/helpers.py | 106 +++++++++++ src/api/endpoints/submit/agency/query.py | 88 ++++++++++ src/api/endpoints/submit/agency/request.py | 11 ++ src/api/endpoints/submit/agency/response.py | 9 + src/api/endpoints/submit/routes.py | 20 ++- src/api/main.py | 4 +- src/db/client/async_.py | 2 +- src/db/models/impl/proposals/__init__.py | 0 .../models/impl/proposals/agency_/__init__.py | 0 src/db/models/impl/proposals/agency_/core.py | 38 ++++ .../impl/proposals/agency_/decision_info.py | 27 +++ .../impl/proposals/agency_/link__location.py | 22 +++ src/db/models/impl/proposals/enums.py | 7 + src/db/models/mixins.py | 3 +- .../integration/api/pending/test_agencies.py | 16 -- .../integration/api/proposals/__init__.py | 0 .../api/proposals/test_agencies.py | 164 ++++++++++++++++++ 33 files changed, 998 insertions(+), 22 deletions(-) create mode 100644 alembic/versions/2025_12_21_1957-30ee666f15d1_add_pending_agencies_tables.py rename src/api/endpoints/{pending => proposals}/__init__.py (100%) rename src/api/endpoints/{pending => proposals}/agencies/__init__.py (100%) rename src/api/endpoints/{pending => proposals}/agencies/approve/__init__.py (100%) create mode 100644 src/api/endpoints/proposals/agencies/approve/query.py create mode 100644 src/api/endpoints/proposals/agencies/approve/response.py rename src/api/endpoints/{pending => proposals}/agencies/get/__init__.py (100%) create mode 100644 src/api/endpoints/proposals/agencies/get/query.py create mode 100644 src/api/endpoints/proposals/agencies/get/response.py rename {tests/automated/integration/api/pending => src/api/endpoints/proposals/agencies/reject}/__init__.py (100%) create mode 100644 src/api/endpoints/proposals/agencies/reject/query.py create mode 100644 src/api/endpoints/proposals/agencies/reject/request.py create mode 100644 src/api/endpoints/proposals/agencies/reject/response.py create mode 100644 src/api/endpoints/proposals/routes.py rename src/api/endpoints/{pending/routes.py => submit/agency/__init__.py} (100%) create mode 100644 src/api/endpoints/submit/agency/enums.py create mode 100644 src/api/endpoints/submit/agency/helpers.py create mode 100644 src/api/endpoints/submit/agency/query.py create mode 100644 src/api/endpoints/submit/agency/request.py create mode 100644 src/api/endpoints/submit/agency/response.py create mode 100644 src/db/models/impl/proposals/__init__.py create mode 100644 src/db/models/impl/proposals/agency_/__init__.py create mode 100644 src/db/models/impl/proposals/agency_/core.py create mode 100644 src/db/models/impl/proposals/agency_/decision_info.py create mode 100644 src/db/models/impl/proposals/agency_/link__location.py create mode 100644 src/db/models/impl/proposals/enums.py delete mode 100644 tests/automated/integration/api/pending/test_agencies.py create mode 100644 tests/automated/integration/api/proposals/__init__.py create mode 100644 tests/automated/integration/api/proposals/test_agencies.py diff --git a/alembic/versions/2025_12_21_1957-30ee666f15d1_add_pending_agencies_tables.py b/alembic/versions/2025_12_21_1957-30ee666f15d1_add_pending_agencies_tables.py new file mode 100644 index 00000000..b5af2358 --- /dev/null +++ b/alembic/versions/2025_12_21_1957-30ee666f15d1_add_pending_agencies_tables.py @@ -0,0 +1,112 @@ +"""Add pending agencies tables + +Revision ID: 30ee666f15d1 +Revises: 9292faed37fd +Create Date: 2025-12-21 19:57:58.199838 + +Design notes: + +After debating it internally, I elected to have a separate pending agencies table, +rather than adding an `approval status` column to the agencies table. + +This is for a few reasons: + 1. Many existing queries and models rely on the current agency setup, + and would need to be retrofitted in order to filter + approved and unapproved agencies. + 2. Some existing links, such as between agencies and batches, between agencies and URLs, + or agency annotations for URLs, would not make sense for pending agencies, + and would be difficult to prevent in the database. + +This setup does, however, make it more difficult to check for duplicates between +existing agencies and pending agencies. However, I concluded it was better for +pending agencies to be negatively affected by these design choices than +for existing agencies to be affected by the above design choices. + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from src.util.alembic_helpers import id_column, created_at_column, enum_column, agency_id_column + +# revision identifiers, used by Alembic. +revision: str = '30ee666f15d1' +down_revision: Union[str, None] = '9292faed37fd' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + + + +def upgrade() -> None: + _create_proposed_agency_table() + _create_proposed_agency_location_table() + _create_proposed_agency_decision_info_table() + +def _create_proposed_agency_decision_info_table(): + op.create_table( + "proposal__agencies__decision_info", + sa.Column("proposal_agency_id", sa.Integer(), sa.ForeignKey("proposal__agencies.id"), nullable=False), + sa.Column("deciding_user_id", sa.Integer), + sa.Column("rejection_reason", sa.String(), nullable=True), + created_at_column(), + sa.PrimaryKeyConstraint("proposal_agency_id") + ) + + +def _create_proposed_agency_table(): + op.execute("CREATE TYPE proposal_status_enum AS ENUM ('pending', 'approved', 'rejected');") + + op.create_table( + "proposal__agencies", + id_column(), + sa.Column("name", sa.String(), nullable=False), + enum_column( + column_name="agency_type", + enum_name="agency_type_enum", + ), + enum_column( + column_name="jurisdiction_type", + enum_name="jurisdiction_type_enum" + ), + sa.Column("proposing_user_id", sa.Integer(), nullable=True), + sa.Column( + "promoted_agency_id", + sa.Integer(), + sa.ForeignKey( + "agencies.id" + ) + ), + enum_column( + column_name="proposal_status", + enum_name="proposal_status_enum", + ), + created_at_column(), + sa.CheckConstraint( + "promoted_agency_id IS NULL OR proposal_status = 'pending'", + name="ck_agency_id_or_proposal_status" + ) + ) + +def _create_proposed_agency_location_table(): + op.create_table( + "proposal__link__agencies__locations", + sa.Column( + "proposal_agency_id", + sa.Integer(), + sa.ForeignKey("proposal__agencies.id"), + nullable=False, + ), + sa.Column( + "location_id", + sa.Integer(), + sa.ForeignKey("locations.id"), + nullable=False + ), + created_at_column(), + sa.PrimaryKeyConstraint("proposal_agency_id", "location_id") + ) + +def downgrade() -> None: + pass diff --git a/src/api/endpoints/pending/__init__.py b/src/api/endpoints/proposals/__init__.py similarity index 100% rename from src/api/endpoints/pending/__init__.py rename to src/api/endpoints/proposals/__init__.py diff --git a/src/api/endpoints/pending/agencies/__init__.py b/src/api/endpoints/proposals/agencies/__init__.py similarity index 100% rename from src/api/endpoints/pending/agencies/__init__.py rename to src/api/endpoints/proposals/agencies/__init__.py diff --git a/src/api/endpoints/pending/agencies/approve/__init__.py b/src/api/endpoints/proposals/agencies/approve/__init__.py similarity index 100% rename from src/api/endpoints/pending/agencies/approve/__init__.py rename to src/api/endpoints/proposals/agencies/approve/__init__.py diff --git a/src/api/endpoints/proposals/agencies/approve/query.py b/src/api/endpoints/proposals/agencies/approve/query.py new file mode 100644 index 00000000..3c08954e --- /dev/null +++ b/src/api/endpoints/proposals/agencies/approve/query.py @@ -0,0 +1,152 @@ +from pydantic import BaseModel +from sqlalchemy import select, func, RowMapping, update +from sqlalchemy.exc import NoResultFound +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.proposals.agencies.approve.response import ProposalAgencyApproveResponse +from src.db.models.impl.agency.enums import JurisdictionType, AgencyType +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.db.models.impl.proposals.agency_.core import ProposalAgency +from src.db.models.impl.proposals.agency_.decision_info import ProposalAgencyDecisionInfo +from src.db.models.impl.proposals.agency_.link__location import ProposalLinkAgencyLocation +from src.db.models.impl.proposals.enums import ProposalStatus +from src.db.queries.base.builder import QueryBuilderBase + +class _ProposalAgencyIntermediateModel(BaseModel): + proposal_id: int + name: str + agency_type: AgencyType + jurisdiction_type: JurisdictionType | None + proposal_status: ProposalStatus + location_ids: list[int] + +class ProposalAgencyApproveQueryBuilder(QueryBuilderBase): + + def __init__( + self, + proposed_agency_id: int, + deciding_user_id: int + ): + super().__init__() + self.proposed_agency_id = proposed_agency_id + self.deciding_user_id = deciding_user_id + + async def run(self, session: AsyncSession) -> ProposalAgencyApproveResponse: + + # Get proposed agency + proposed_agency: _ProposalAgencyIntermediateModel | None = await self._get_proposed_agency(session=session) + if proposed_agency is None: + return ProposalAgencyApproveResponse( + message="Proposed agency not found.", + success=False + ) + + # Confirm proposed agency is pending. Otherwise, fail early + if proposed_agency.proposal_status != ProposalStatus.PENDING: + return ProposalAgencyApproveResponse( + message="Proposed agency is not pending.", + success=False + ) + + await self._add_decision_info(session=session) + + promoted_agency_id: int = await self._add_promoted_agency( + session=session, + proposed_agency=proposed_agency + ) + + await self._add_location_links( + session=session, + promoted_agency_id=promoted_agency_id, + location_ids=proposed_agency.location_ids + ) + + await self._update_proposed_agency_status(session=session) + + return ProposalAgencyApproveResponse( + message="Proposed agency approved.", + success=True, + agency_id=promoted_agency_id + ) + + async def _get_proposed_agency(self, session: AsyncSession) -> _ProposalAgencyIntermediateModel | None: + query = ( + select( + ProposalAgency.id, + ProposalAgency.name, + ProposalAgency.agency_type, + ProposalAgency.jurisdiction_type, + ProposalAgency.proposal_status, + func.array_agg(ProposalLinkAgencyLocation.location_id).label("location_ids") + ) + .outerjoin( + ProposalLinkAgencyLocation, + ProposalLinkAgencyLocation.proposal_agency_id == ProposalAgency.id + ) + .where( + ProposalAgency.id == self.proposed_agency_id + ) + .group_by( + ProposalAgency.id, + ProposalAgency.name, + ProposalAgency.agency_type, + ProposalAgency.jurisdiction_type + ) + ) + try: + mapping: RowMapping | None = await self.sh.mapping(session, query) + except NoResultFound: + return None + return _ProposalAgencyIntermediateModel( + proposal_id=mapping[ProposalAgency.id], + name=mapping[ProposalAgency.name], + agency_type=mapping[ProposalAgency.agency_type], + jurisdiction_type=mapping[ProposalAgency.jurisdiction_type], + proposal_status=mapping[ProposalAgency.proposal_status], + location_ids=mapping["location_ids"] if mapping["location_ids"] != [None] else [] + ) + + async def _add_decision_info(self, session: AsyncSession) -> None: + decision_info = ProposalAgencyDecisionInfo( + deciding_user_id=self.deciding_user_id, + proposal_agency_id=self.proposed_agency_id, + ) + session.add(decision_info) + + @staticmethod + async def _add_promoted_agency( + session: AsyncSession, + proposed_agency: _ProposalAgencyIntermediateModel + ) -> int: + agency = Agency( + name=proposed_agency.name, + agency_type=proposed_agency.agency_type, + jurisdiction_type=proposed_agency.jurisdiction_type, + ) + session.add(agency) + await session.flush() + return agency.id + + @staticmethod + async def _add_location_links( + session: AsyncSession, + promoted_agency_id: int, + location_ids: list[int] + ): + links: list[LinkAgencyLocation] = [] + for location_id in location_ids: + link = LinkAgencyLocation( + agency_id=promoted_agency_id, + location_id=location_id + ) + links.append(link) + session.add_all(links) + + async def _update_proposed_agency_status(self, session: AsyncSession) -> None: + query = update(ProposalAgency).where( + ProposalAgency.id == self.proposed_agency_id + ).values( + proposal_status=ProposalStatus.APPROVED + ) + await session.execute(query) diff --git a/src/api/endpoints/proposals/agencies/approve/response.py b/src/api/endpoints/proposals/agencies/approve/response.py new file mode 100644 index 00000000..9de62d6c --- /dev/null +++ b/src/api/endpoints/proposals/agencies/approve/response.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel + + +class ProposalAgencyApproveResponse(BaseModel): + message: str + success: bool + agency_id: int | None = None \ No newline at end of file diff --git a/src/api/endpoints/pending/agencies/get/__init__.py b/src/api/endpoints/proposals/agencies/get/__init__.py similarity index 100% rename from src/api/endpoints/pending/agencies/get/__init__.py rename to src/api/endpoints/proposals/agencies/get/__init__.py diff --git a/src/api/endpoints/proposals/agencies/get/query.py b/src/api/endpoints/proposals/agencies/get/query.py new file mode 100644 index 00000000..dde61c90 --- /dev/null +++ b/src/api/endpoints/proposals/agencies/get/query.py @@ -0,0 +1,56 @@ +from typing import Sequence + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import joinedload + +from src.api.endpoints.agencies.by_id.locations.get.response import AgencyGetLocationsResponse +from src.api.endpoints.proposals.agencies.get.response import ProposalAgencyGetOuterResponse, ProposalAgencyGetResponse +from src.db.models.impl.proposals.agency_.core import ProposalAgency +from src.db.models.impl.proposals.enums import ProposalStatus +from src.db.queries.base.builder import QueryBuilderBase + + +class ProposalAgencyGetQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> ProposalAgencyGetOuterResponse: + query = ( + select( + ProposalAgency + ).where( + ProposalAgency.proposal_status == ProposalStatus.PENDING + ).options( + joinedload(ProposalAgency.locations) + ) + ) + proposal_agencies: Sequence[ProposalAgency] = ( + await session.execute(query) + ).unique().scalars().all() + if len(proposal_agencies) == 0: + return ProposalAgencyGetOuterResponse( + results=[] + ) + responses: list[ProposalAgencyGetResponse] = [] + for proposal_agency in proposal_agencies: + locations: list[AgencyGetLocationsResponse] = [] + for location in proposal_agency.locations: + location = AgencyGetLocationsResponse( + location_id=location.id, + full_display_name=location.full_display_name, + ) + locations.append(location) + + response = ProposalAgencyGetResponse( + id=proposal_agency.id, + name=proposal_agency.name, + proposing_user_id=proposal_agency.proposing_user_id, + agency_type=proposal_agency.agency_type, + jurisdiction_type=proposal_agency.jurisdiction_type, + created_at=proposal_agency.created_at, + locations=locations + ) + responses.append(response) + + return ProposalAgencyGetOuterResponse( + results=responses + ) diff --git a/src/api/endpoints/proposals/agencies/get/response.py b/src/api/endpoints/proposals/agencies/get/response.py new file mode 100644 index 00000000..e5a365c1 --- /dev/null +++ b/src/api/endpoints/proposals/agencies/get/response.py @@ -0,0 +1,18 @@ +from datetime import datetime + +from pydantic import BaseModel + +from src.api.endpoints.agencies.by_id.locations.get.response import AgencyGetLocationsResponse +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType + +class ProposalAgencyGetResponse(BaseModel): + id: int + name: str + proposing_user_id: int | None + agency_type: AgencyType + jurisdiction_type: JurisdictionType + locations: list[AgencyGetLocationsResponse] + created_at: datetime + +class ProposalAgencyGetOuterResponse(BaseModel): + results: list[ProposalAgencyGetResponse] \ No newline at end of file diff --git a/tests/automated/integration/api/pending/__init__.py b/src/api/endpoints/proposals/agencies/reject/__init__.py similarity index 100% rename from tests/automated/integration/api/pending/__init__.py rename to src/api/endpoints/proposals/agencies/reject/__init__.py diff --git a/src/api/endpoints/proposals/agencies/reject/query.py b/src/api/endpoints/proposals/agencies/reject/query.py new file mode 100644 index 00000000..0635a58d --- /dev/null +++ b/src/api/endpoints/proposals/agencies/reject/query.py @@ -0,0 +1,83 @@ +from pydantic import BaseModel +from sqlalchemy import select, RowMapping, update +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.proposals.agencies.reject.request import ProposalAgencyRejectRequestModel +from src.api.endpoints.proposals.agencies.reject.response import ProposalAgencyRejectResponse +from src.db.models.impl.proposals.agency_.core import ProposalAgency +from src.db.models.impl.proposals.agency_.decision_info import ProposalAgencyDecisionInfo +from src.db.models.impl.proposals.enums import ProposalStatus +from src.db.queries.base.builder import QueryBuilderBase + +class _ProposalAgencyIntermediateModel(BaseModel): + proposal_id: int + proposal_status: ProposalStatus + + +class ProposalAgencyRejectQueryBuilder(QueryBuilderBase): + + def __init__( + self, + deciding_user_id: int, + proposed_agency_id: int, + request_model: ProposalAgencyRejectRequestModel + ): + super().__init__() + self.deciding_user_id = deciding_user_id + self.proposed_agency_id = proposed_agency_id + self.rejection_reason = request_model.rejection_reason + + async def run(self, session: AsyncSession) -> ProposalAgencyRejectResponse: + # Get proposed agency + proposed_agency: _ProposalAgencyIntermediateModel | None = await self._get_proposed_agency(session=session) + if proposed_agency is None: + return ProposalAgencyRejectResponse( + message="Proposed agency not found.", + success=False + ) + + # Confirm proposed agency is pending. Otherwise, fail early + if proposed_agency.proposal_status != ProposalStatus.PENDING: + return ProposalAgencyRejectResponse( + message="Proposed agency is not pending.", + success=False + ) + + await self._add_decision_info(session=session) + await self._update_proposed_agency_status(session=session) + + return ProposalAgencyRejectResponse( + message="Proposed agency rejected.", + success=True + ) + + async def _get_proposed_agency(self, session: AsyncSession) -> _ProposalAgencyIntermediateModel | None: + query = ( + select( + ProposalAgency.id.label("proposal_id"), + ProposalAgency.proposal_status + ) + .where( + ProposalAgency.id == self.proposed_agency_id + ) + ) + mapping: RowMapping | None = await self.sh.mapping(session, query) + if mapping is None: + return None + return _ProposalAgencyIntermediateModel(**mapping) + + async def _add_decision_info(self, session: AsyncSession) -> None: + decision_info = ProposalAgencyDecisionInfo( + proposal_agency_id=self.proposed_agency_id, + rejection_reason=self.rejection_reason, + deciding_user_id=self.deciding_user_id + ) + session.add(decision_info) + + async def _update_proposed_agency_status(self, session: AsyncSession) -> None: + query = update(ProposalAgency).where( + ProposalAgency.id == self.proposed_agency_id + ).values( + proposal_status=ProposalStatus.REJECTED + ) + await session.execute(query) diff --git a/src/api/endpoints/proposals/agencies/reject/request.py b/src/api/endpoints/proposals/agencies/reject/request.py new file mode 100644 index 00000000..8c3b1d1c --- /dev/null +++ b/src/api/endpoints/proposals/agencies/reject/request.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel + + +class ProposalAgencyRejectRequestModel(BaseModel): + rejection_reason: str \ No newline at end of file diff --git a/src/api/endpoints/proposals/agencies/reject/response.py b/src/api/endpoints/proposals/agencies/reject/response.py new file mode 100644 index 00000000..af85550b --- /dev/null +++ b/src/api/endpoints/proposals/agencies/reject/response.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel + + +class ProposalAgencyRejectResponse(BaseModel): + success: bool + message: str \ No newline at end of file diff --git a/src/api/endpoints/proposals/routes.py b/src/api/endpoints/proposals/routes.py new file mode 100644 index 00000000..8371c604 --- /dev/null +++ b/src/api/endpoints/proposals/routes.py @@ -0,0 +1,56 @@ +from fastapi import APIRouter, Depends, Path + +from src.api.dependencies import get_async_core +from src.api.endpoints.proposals.agencies.approve.query import ProposalAgencyApproveQueryBuilder +from src.api.endpoints.proposals.agencies.approve.response import ProposalAgencyApproveResponse +from src.api.endpoints.proposals.agencies.get.query import ProposalAgencyGetQueryBuilder +from src.api.endpoints.proposals.agencies.get.response import ProposalAgencyGetOuterResponse +from src.api.endpoints.proposals.agencies.reject.query import ProposalAgencyRejectQueryBuilder +from src.api.endpoints.proposals.agencies.reject.request import ProposalAgencyRejectRequestModel +from src.api.endpoints.proposals.agencies.reject.response import ProposalAgencyRejectResponse +from src.core.core import AsyncCore +from src.security.dtos.access_info import AccessInfo +from src.security.manager import get_access_info + +proposal_router = APIRouter(prefix="/proposal", tags=["Pending"]) + +@proposal_router.get("/agencies") +async def get_pending_agencies( + async_core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_access_info), +) -> ProposalAgencyGetOuterResponse: + return await async_core.adb_client.run_query_builder( + ProposalAgencyGetQueryBuilder(), + ) + +@proposal_router.post("/agencies/{proposed_agency_id}/approve") +async def approve_proposed_agency( + async_core: AsyncCore = Depends(get_async_core), + proposed_agency_id: int = Path( + description="Proposed agency ID to approve" + ), + access_info: AccessInfo = Depends(get_access_info), +) -> ProposalAgencyApproveResponse: + return await async_core.adb_client.run_query_builder( + ProposalAgencyApproveQueryBuilder( + proposed_agency_id=proposed_agency_id, + deciding_user_id=access_info.user_id, + ) + ) + +@proposal_router.post("/agencies/{proposed_agency_id}/reject") +async def reject_proposed_agency( + request: ProposalAgencyRejectRequestModel, + async_core: AsyncCore = Depends(get_async_core), + proposed_agency_id: int = Path( + description="Proposed agency ID to reject" + ), + access_info: AccessInfo = Depends(get_access_info), +) -> ProposalAgencyRejectResponse: + return await async_core.adb_client.run_query_builder( + ProposalAgencyRejectQueryBuilder( + proposed_agency_id=proposed_agency_id, + deciding_user_id=access_info.user_id, + request_model=request, + ) + ) \ No newline at end of file diff --git a/src/api/endpoints/pending/routes.py b/src/api/endpoints/submit/agency/__init__.py similarity index 100% rename from src/api/endpoints/pending/routes.py rename to src/api/endpoints/submit/agency/__init__.py diff --git a/src/api/endpoints/submit/agency/enums.py b/src/api/endpoints/submit/agency/enums.py new file mode 100644 index 00000000..95e160df --- /dev/null +++ b/src/api/endpoints/submit/agency/enums.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class AgencyProposalRequestStatus(Enum): + SUCCESS = "SUCCESS" + PROPOSAL_DUPLICATE = "PROPOSAL_DUPLICATE" + ACCEPTED_DUPLICATE = "ACCEPTED_DUPLICATE" + ERROR = "ERROR" diff --git a/src/api/endpoints/submit/agency/helpers.py b/src/api/endpoints/submit/agency/helpers.py new file mode 100644 index 00000000..12abc550 --- /dev/null +++ b/src/api/endpoints/submit/agency/helpers.py @@ -0,0 +1,106 @@ +from sqlalchemy import func, select +from sqlalchemy.dialects.postgresql import aggregate_order_by + +from src.api.endpoints.submit.agency.request import SubmitAgencyRequestModel +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.db.models.impl.proposals.agency_.core import ProposalAgency +from src.db.models.impl.proposals.agency_.link__location import ProposalLinkAgencyLocation + + +def norm_name(col): + # POSTGRES: lower(regexp_replace(trim(name), '\s+', ' ', 'g')) + return func.lower( + func.regexp_replace(func.trim(col), r"\s+", " ", "g") + ) + +def exact_duplicates_for_approved_agency_query( + request: SubmitAgencyRequestModel, +): + link = LinkAgencyLocation + agencies = Agency + + agency_locations_cte = ( + select( + link.agency_id, + # Postgres ARRAY_AGG with deterministic ordering + func.array_agg( + aggregate_order_by( + link.location_id, + link.location_id.asc() + ) + ).label("location_ids") + ) + .group_by( + link.agency_id, + ) + .cte("agency_locations") + ) + + query = ( + select( + agencies.id, + ) + .join( + agency_locations_cte, + agency_locations_cte.c.agency_id == agencies.id + ) + .where( + norm_name(agencies.name) == request.name.lower().strip(), + agencies.jurisdiction_type == request.jurisdiction_type, + agencies.agency_type == request.agency_type, + agency_locations_cte.c.location_ids == sorted(request.location_ids), + ) + .group_by( + agencies.id, + ) + ) + + return query + + +def exact_duplicates_for_proposal_agency_query( + request: SubmitAgencyRequestModel, +): + link = ProposalLinkAgencyLocation + agencies = ProposalAgency + + agency_locations_cte = ( + select( + link.proposal_agency_id, + # Postgres ARRAY_AGG with deterministic ordering + func.array_agg( + aggregate_order_by( + link.location_id, + link.location_id.asc() + ) + ).label("location_ids") + ) + .group_by( + link.proposal_agency_id, + ) + .cte("agency_locations") + ) + + query = ( + select( + agencies.id, + ) + .join( + agency_locations_cte, + agency_locations_cte.c.proposal_agency_id == agencies.id + ) + .where( + norm_name(agencies.name) == request.name.lower().strip(), + agencies.jurisdiction_type == request.jurisdiction_type, + agencies.agency_type == request.agency_type, + agency_locations_cte.c.location_ids == sorted(request.location_ids), + ) + .group_by( + agencies.id, + ) + ) + + return query + + diff --git a/src/api/endpoints/submit/agency/query.py b/src/api/endpoints/submit/agency/query.py new file mode 100644 index 00000000..a59f5f12 --- /dev/null +++ b/src/api/endpoints/submit/agency/query.py @@ -0,0 +1,88 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.submit.agency.enums import AgencyProposalRequestStatus +from src.api.endpoints.submit.agency.helpers import \ + exact_duplicates_for_proposal_agency_query, exact_duplicates_for_approved_agency_query +from src.api.endpoints.submit.agency.request import SubmitAgencyRequestModel +from src.api.endpoints.submit.agency.response import SubmitAgencyProposalResponse +from src.db.models.impl.proposals.agency_.core import ProposalAgency +from src.db.models.impl.proposals.agency_.link__location import ProposalLinkAgencyLocation +from src.db.models.impl.proposals.enums import ProposalStatus +from src.db.queries.base.builder import QueryBuilderBase + + +class SubmitAgencyProposalQueryBuilder(QueryBuilderBase): + + def __init__(self, request: SubmitAgencyRequestModel, user_id: int): + super().__init__() + self.request = request + self.user_id = user_id + + async def run(self, session: AsyncSession) -> SubmitAgencyProposalResponse: + + # Check that an agency with the same name AND location IDs does not exist + # as an approved agency + if await self._approved_agency_exists(session): + return SubmitAgencyProposalResponse( + status=AgencyProposalRequestStatus.ACCEPTED_DUPLICATE, + details="An agency with the same properties is already approved." + ) + + # Check that an agency with the same name AND location IDs does not exist + # as a proposed agency + if await self._proposed_agency_exists(session): + return SubmitAgencyProposalResponse( + status=AgencyProposalRequestStatus.PROPOSAL_DUPLICATE, + details="An agency with the same properties is already in the proposal queue." + ) + + # Add proposed agency and get proposal ID + proposal_id: int = await self._add_proposed_agency(session) + + # Add proposed agency locations + await self._add_proposed_agency_locations( + session=session, + proposal_id=proposal_id, + location_ids=self.request.location_ids + ) + + # Return response + + return SubmitAgencyProposalResponse( + proposal_id=proposal_id, + status=AgencyProposalRequestStatus.SUCCESS, + details="Successfully added proposed agency." + ) + + async def _approved_agency_exists(self, session: AsyncSession) -> bool: + query = exact_duplicates_for_approved_agency_query(self.request) + return await self.sh.results_exist(session, query=query) + + async def _proposed_agency_exists(self, session: AsyncSession) -> bool: + query = exact_duplicates_for_proposal_agency_query(self.request) + return await self.sh.results_exist(session, query=query) + + async def _add_proposed_agency(self, session: AsyncSession) -> int: + proposal = ProposalAgency( + name=self.request.name, + jurisdiction_type=self.request.jurisdiction_type, + agency_type=self.request.agency_type, + proposing_user_id=self.user_id, + proposal_status=ProposalStatus.PENDING, + ) + session.add(proposal) + await session.flush() + return proposal.id + + async def _add_proposed_agency_locations( + self, + session: AsyncSession, + location_ids: list[int], + proposal_id: int + ) -> None: + for location_id in location_ids: + link = ProposalLinkAgencyLocation( + proposal_agency_id=proposal_id, + location_id=location_id + ) + session.add(link) diff --git a/src/api/endpoints/submit/agency/request.py b/src/api/endpoints/submit/agency/request.py new file mode 100644 index 00000000..8fef866a --- /dev/null +++ b/src/api/endpoints/submit/agency/request.py @@ -0,0 +1,11 @@ +from pydantic import BaseModel + +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType + + +class SubmitAgencyRequestModel(BaseModel): + name: str + agency_type: AgencyType + jurisdiction_type: JurisdictionType + + location_ids: list[int] \ No newline at end of file diff --git a/src/api/endpoints/submit/agency/response.py b/src/api/endpoints/submit/agency/response.py new file mode 100644 index 00000000..886713a5 --- /dev/null +++ b/src/api/endpoints/submit/agency/response.py @@ -0,0 +1,9 @@ +from pydantic import BaseModel + +from src.api.endpoints.submit.agency.enums import AgencyProposalRequestStatus + + +class SubmitAgencyProposalResponse(BaseModel): + proposal_id: int | None = None + status: AgencyProposalRequestStatus + details: str | None \ No newline at end of file diff --git a/src/api/endpoints/submit/routes.py b/src/api/endpoints/submit/routes.py index 2eb46c15..dec7e2aa 100644 --- a/src/api/endpoints/submit/routes.py +++ b/src/api/endpoints/submit/routes.py @@ -1,11 +1,12 @@ from fastapi import APIRouter, Depends from src.api.dependencies import get_async_core - +from src.api.endpoints.submit.agency.query import SubmitAgencyProposalQueryBuilder +from src.api.endpoints.submit.agency.request import SubmitAgencyRequestModel +from src.api.endpoints.submit.agency.response import SubmitAgencyProposalResponse from src.api.endpoints.submit.data_source.models.response.duplicate import \ SubmitDataSourceURLDuplicateSubmissionResponse from src.api.endpoints.submit.data_source.models.response.standard import SubmitDataSourceURLProposalResponse -from src.api.endpoints.submit.data_source.queries.core import SubmitDataSourceURLProposalQueryBuilder from src.api.endpoints.submit.data_source.request import DataSourceSubmissionRequest from src.api.endpoints.submit.data_source.wrapper import submit_data_source_url_proposal from src.api.endpoints.submit.url.models.request import URLSubmissionRequest @@ -13,7 +14,7 @@ from src.api.endpoints.submit.url.queries.core import SubmitURLQueryBuilder from src.core.core import AsyncCore from src.security.dtos.access_info import AccessInfo -from src.security.manager import get_access_info +from src.security.manager import get_access_info, get_standard_user_access_info submit_router = APIRouter(prefix="/submit", tags=["Submit"]) @@ -49,3 +50,16 @@ async def submit_data_source( request=request, adb_client=async_core.adb_client ) + +@submit_router.post("/agency") +async def submit_agency( + request: SubmitAgencyRequestModel, + async_core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_standard_user_access_info) +) -> SubmitAgencyProposalResponse: + return await async_core.adb_client.run_query_builder( + SubmitAgencyProposalQueryBuilder( + request=request, + user_id=access_info.user_id + ) + ) diff --git a/src/api/main.py b/src/api/main.py index 87fa0d3a..a62e6fdf 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -19,6 +19,7 @@ from src.api.endpoints.locations.routes import location_url_router from src.api.endpoints.meta_url.routes import meta_urls_router from src.api.endpoints.metrics.routes import metrics_router +from src.api.endpoints.proposals.routes import proposal_router from src.api.endpoints.root import root_router from src.api.endpoints.search.routes import search_router from src.api.endpoints.submit.routes import submit_router @@ -199,7 +200,8 @@ async def redirect_docs(): data_sources_router, meta_urls_router, check_router, - location_url_router + location_url_router, + proposal_router ] for router in routers: diff --git a/src/db/client/async_.py b/src/db/client/async_.py index 89187f11..e30c13bf 100644 --- a/src/db/client/async_.py +++ b/src/db/client/async_.py @@ -268,7 +268,7 @@ async def add_user_relevant_suggestion( url_id=url_id ) if prior_suggestion is not None: - prior_suggestion.type = suggested_status.value + prior_suggestion.agency_type = suggested_status.value return suggestion = AnnotationURLTypeUser( diff --git a/src/db/models/impl/proposals/__init__.py b/src/db/models/impl/proposals/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/proposals/agency_/__init__.py b/src/db/models/impl/proposals/agency_/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/proposals/agency_/core.py b/src/db/models/impl/proposals/agency_/core.py new file mode 100644 index 00000000..69172768 --- /dev/null +++ b/src/db/models/impl/proposals/agency_/core.py @@ -0,0 +1,38 @@ +from sqlalchemy import Column, String, Integer, ForeignKey +from sqlalchemy.orm import Mapped, relationship + +from src.db.models.helpers import enum_column +from src.db.models.impl.agency.enums import JurisdictionType, AgencyType +from src.db.models.impl.proposals.enums import ProposalStatus +from src.db.models.mixins import CreatedAtMixin +from src.db.models.templates_.with_id import WithIDBase + + +class ProposalAgency( + WithIDBase, + CreatedAtMixin +): + + __tablename__ = "proposal__agencies" + + name = Column(String, nullable=False) + agency_type: Mapped[AgencyType] = enum_column(AgencyType, name="agency_type_enum") + jurisdiction_type: Mapped[JurisdictionType] = enum_column( + JurisdictionType, + name="jurisdiction_type_enum", + nullable=False, + ) + proposing_user_id: Mapped[int | None] = Column(Integer, nullable=True) + proposal_status: Mapped[ProposalStatus] = enum_column(ProposalStatus, name="proposal_status_enum") + promoted_agency_id: Mapped[int | None] = Column( + Integer, + ForeignKey("agencies.id"), + nullable=True + ) + + locations = relationship( + "LocationExpandedView", + primaryjoin="ProposalAgency.id == ProposalLinkAgencyLocation.proposal_agency_id", + secondaryjoin="LocationExpandedView.id == ProposalLinkAgencyLocation.location_id", + secondary="proposal__link__agencies__locations", + ) diff --git a/src/db/models/impl/proposals/agency_/decision_info.py b/src/db/models/impl/proposals/agency_/decision_info.py new file mode 100644 index 00000000..5cc19dd0 --- /dev/null +++ b/src/db/models/impl/proposals/agency_/decision_info.py @@ -0,0 +1,27 @@ +""" +Provides decision information on an Agency + +""" +from sqlalchemy import Column, Integer, String, ForeignKey, PrimaryKeyConstraint +from sqlalchemy.orm import Mapped + +from src.db.models.mixins import CreatedAtMixin +from src.db.models.templates_.base import Base + + +class ProposalAgencyDecisionInfo( + Base, + CreatedAtMixin, +): + __tablename__ = "proposal__agencies__decision_info" + __table_args__ = ( + PrimaryKeyConstraint("proposal_agency_id"), + ) + + proposal_agency_id: Mapped[int] = Column( + Integer, + ForeignKey("proposal__agencies.id"), + nullable=False + ) + deciding_user_id: Mapped[int] = Column(Integer) + rejection_reason: Mapped[str | None] = Column(String, nullable=True) diff --git a/src/db/models/impl/proposals/agency_/link__location.py b/src/db/models/impl/proposals/agency_/link__location.py new file mode 100644 index 00000000..43d7c9fd --- /dev/null +++ b/src/db/models/impl/proposals/agency_/link__location.py @@ -0,0 +1,22 @@ +from sqlalchemy import PrimaryKeyConstraint, Column, ForeignKey, Integer +from sqlalchemy.orm import Mapped + +from src.db.models.mixins import LocationDependentMixin, CreatedAtMixin +from src.db.models.templates_.base import Base + + +class ProposalLinkAgencyLocation( + Base, + LocationDependentMixin, + CreatedAtMixin +): + __tablename__ = "proposal__link__agencies__locations" + __table_args__ = ( + PrimaryKeyConstraint("proposal_agency_id", "location_id"), + ) + + proposal_agency_id: Mapped[int] = Column( + Integer, + ForeignKey("proposal__agencies.id"), + nullable=False + ) \ No newline at end of file diff --git a/src/db/models/impl/proposals/enums.py b/src/db/models/impl/proposals/enums.py new file mode 100644 index 00000000..defd0d8c --- /dev/null +++ b/src/db/models/impl/proposals/enums.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class ProposalStatus(Enum): + PENDING = "pending" + APPROVED = "approved" + REJECTED = "rejected" \ No newline at end of file diff --git a/src/db/models/mixins.py b/src/db/models/mixins.py index 640ec955..4a8ae48f 100644 --- a/src/db/models/mixins.py +++ b/src/db/models/mixins.py @@ -1,6 +1,7 @@ from typing import ClassVar from sqlalchemy import Column, Integer, ForeignKey, TIMESTAMP, event +from sqlalchemy.orm import Mapped from src.db.models.exceptions import WriteToViewError from src.db.models.helpers import get_created_at_column, CURRENT_TIME_SERVER_DEFAULT, url_id_primary_key_constraint, \ @@ -41,7 +42,7 @@ class BatchDependentMixin: ) class LocationDependentMixin: - location_id = Column( + location_id: Mapped[int] = Column( Integer, ForeignKey( 'locations.id', diff --git a/tests/automated/integration/api/pending/test_agencies.py b/tests/automated/integration/api/pending/test_agencies.py deleted file mode 100644 index 24061804..00000000 --- a/tests/automated/integration/api/pending/test_agencies.py +++ /dev/null @@ -1,16 +0,0 @@ -import pytest - -from tests.helpers.api_test_helper import APITestHelper - - -@pytest.mark.asyncio -async def test_agencies(api_test_helper: APITestHelper): - pass - - # Add pending agency - - # Call GET endpoint - - # Call APPROVE endpoint - - # Check agency is added diff --git a/tests/automated/integration/api/proposals/__init__.py b/tests/automated/integration/api/proposals/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/api/proposals/test_agencies.py b/tests/automated/integration/api/proposals/test_agencies.py new file mode 100644 index 00000000..70a97118 --- /dev/null +++ b/tests/automated/integration/api/proposals/test_agencies.py @@ -0,0 +1,164 @@ +import pytest + +from src.api.endpoints.proposals.agencies.approve.response import ProposalAgencyApproveResponse +from src.api.endpoints.proposals.agencies.get.response import ProposalAgencyGetOuterResponse +from src.api.endpoints.proposals.agencies.reject.request import ProposalAgencyRejectRequestModel +from src.api.endpoints.proposals.agencies.reject.response import ProposalAgencyRejectResponse +from src.api.endpoints.submit.agency.enums import AgencyProposalRequestStatus +from src.api.endpoints.submit.agency.request import SubmitAgencyRequestModel +from src.api.endpoints.submit.agency.response import SubmitAgencyProposalResponse +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from tests.automated.integration.api._helpers.RequestValidator import RequestValidator +from tests.automated.integration.conftest import MOCK_USER_ID +from tests.helpers.api_test_helper import APITestHelper +from tests.helpers.data_creator.models.creation_info.county import CountyCreationInfo +from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo + + +@pytest.mark.asyncio +async def test_agencies( + api_test_helper: APITestHelper, + pittsburgh_locality: LocalityCreationInfo, + allegheny_county: CountyCreationInfo +): + request = SubmitAgencyRequestModel( + name="test_agency", + agency_type=AgencyType.LAW_ENFORCEMENT, + jurisdiction_type=JurisdictionType.LOCAL, + location_ids=[ + allegheny_county.location_id, + pittsburgh_locality.location_id + ] + ) + + rv: RequestValidator = api_test_helper.request_validator + adb_client: AsyncDatabaseClient = api_test_helper.adb_client() + # Add pending agency + submit_response_success: SubmitAgencyProposalResponse = rv.post_v3( + "/submit/agency", + expected_model=SubmitAgencyProposalResponse, + json=request.model_dump(mode="json") + ) + assert submit_response_success.status == AgencyProposalRequestStatus.SUCCESS + proposal_id: int = submit_response_success.proposal_id + + # Try to submit duplicate agency and confirm it fails + submit_response_proposal_duplicate: SubmitAgencyProposalResponse = rv.post_v3( + "/submit/agency", + expected_model=SubmitAgencyProposalResponse, + json=request.model_dump(mode="json") + ) + assert submit_response_proposal_duplicate.status == AgencyProposalRequestStatus.PROPOSAL_DUPLICATE + assert submit_response_proposal_duplicate.proposal_id is None + assert submit_response_proposal_duplicate.details == "An agency with the same properties is already in the proposal queue." + + # Call GET endpoint + get_response_1: ProposalAgencyGetOuterResponse = rv.get_v3( + "/proposal/agencies", + expected_model=ProposalAgencyGetOuterResponse + ) + # Confirm agency is in response + assert len(get_response_1.results) == 1 + proposal = get_response_1.results[0] + assert proposal.id == proposal_id + assert proposal.name == request.name + assert proposal.proposing_user_id == MOCK_USER_ID + assert proposal.agency_type == request.agency_type + assert proposal.jurisdiction_type == request.jurisdiction_type + assert [loc.location_id for loc in proposal.locations] == request.location_ids + assert proposal.created_at is not None + + # Call APPROVE endpoint + approve_response: ProposalAgencyApproveResponse = rv.post_v3( + f"/proposal/agencies/{proposal_id}/approve", + expected_model=ProposalAgencyApproveResponse + ) + assert approve_response.message == "Proposed agency approved." + assert approve_response.success + assert approve_response.agency_id is not None + agency_id: int = approve_response.agency_id + + # Check agency is added + agencies: list[Agency] = await adb_client.get_all(Agency) + assert len(agencies) == 1 + agency = agencies[0] + assert agency.name == request.name + assert agency.agency_type == request.agency_type + assert agency.jurisdiction_type == request.jurisdiction_type + + links: list[LinkAgencyLocation] = await adb_client.get_all(LinkAgencyLocation) + assert len(links) == 2 + assert {link.agency_id for link in links} == {agency.id} + assert {link.location_id for link in links} == set(request.location_ids) + + # Confirm agency is no longer in proposal queue + get_response_2: ProposalAgencyGetOuterResponse = rv.get_v3( + "/proposal/agencies", + expected_model=ProposalAgencyGetOuterResponse + ) + # Confirm agency is in response + assert len(get_response_2.results) == 0 + + # Try to submit agency again and confirm it fails + submit_response_accepted_duplicate: SubmitAgencyProposalResponse = rv.post_v3( + "/submit/agency", + expected_model=SubmitAgencyProposalResponse, + json=request.model_dump(mode="json") + ) + assert submit_response_accepted_duplicate.status == AgencyProposalRequestStatus.ACCEPTED_DUPLICATE + assert submit_response_accepted_duplicate.proposal_id is None + assert submit_response_accepted_duplicate.details == "An agency with the same properties is already approved." + + # Submit Separate Agency and Reject It + request_for_rejection = SubmitAgencyRequestModel( + name="Rejectable Agency", + agency_type=AgencyType.LAW_ENFORCEMENT, + jurisdiction_type=JurisdictionType.FEDERAL, + location_ids=[] + ) + submit_response_for_rejection: SubmitAgencyProposalResponse = rv.post_v3( + "/submit/agency", + expected_model=SubmitAgencyProposalResponse, + json=request_for_rejection.model_dump(mode="json") + ) + assert submit_response_for_rejection.status == AgencyProposalRequestStatus.SUCCESS + proposal_id_for_rejection: int = submit_response_for_rejection.proposal_id + + # Call REJECT endpoint + reject_response: ProposalAgencyRejectResponse = rv.post_v3( + f"/proposal/agencies/{proposal_id_for_rejection}/reject", + expected_model=ProposalAgencyRejectResponse, + json=ProposalAgencyRejectRequestModel( + rejection_reason="Test rejection reason" + ).model_dump(mode="json") + ) + assert reject_response.success + assert reject_response.message == "Proposed agency rejected." + + # Confirm does not appear in proposal queue OR final agency list + agencies = await adb_client.get_all(Agency) + assert len(agencies) == 1 + assert agencies[0].id == agency.id + + # Confirm cannot reject endpoint already approved + failed_reject_response: ProposalAgencyRejectResponse = rv.post_v3( + f"/proposal/agencies/{proposal_id}/reject", + expected_model=ProposalAgencyRejectResponse, + json=ProposalAgencyRejectRequestModel( + rejection_reason="Test rejection reason" + ).model_dump(mode="json") + ) + assert not failed_reject_response.success + assert failed_reject_response.message == "Proposed agency is not pending." + + # Confirm cannot approve endpoint already rejected + failed_approve_response: ProposalAgencyApproveResponse = rv.post_v3( + f"/proposal/agencies/{proposal_id_for_rejection}/approve", + expected_model=ProposalAgencyApproveResponse + ) + assert not failed_approve_response.success + assert failed_approve_response.message == "Proposed agency is not pending." + From 4acc9f14a10164e4695d4854160d77f3b510a9c4 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Thu, 25 Dec 2025 14:56:01 -0500 Subject: [PATCH 14/24] Add Followed Locations Sync Task --- ENV.md | 1 + ...add_link__locations__user_follows_table.py | 42 +++++++ .../scheduled/impl/sync_from_ds/__init__.py | 0 .../impl/sync_from_ds/impl/__init__.py | 0 .../sync_from_ds/impl/follows/__init__.py | 0 .../impl/sync_from_ds/impl/follows/core.py | 56 +++++++++ .../impl/follows/models/__init__.py | 0 .../follows/models/user_location_pairs.py | 19 +++ .../impl/sync_from_ds/impl/follows/query.py | 74 +++++++++++ .../impl/sync_from_ds/impl/follows/types.py | 4 + src/core/tasks/scheduled/loader.py | 10 ++ src/db/enums.py | 1 + .../models/impl/link/location__user_follow.py | 20 +++ .../pdap/_templates/request_builder.py | 18 ++- .../pdap/impl/sync/follows/__init__.py | 0 src/external/pdap/impl/sync/follows/core.py | 13 ++ .../pdap/impl/sync/follows/response.py | 9 ++ .../scheduled/impl/sync_from_ds/__init__.py | 0 .../sync_from_ds/user_follows/__init__.py | 0 .../sync_from_ds/user_follows/test_core.py | 115 ++++++++++++++++++ .../tasks/scheduled/loader/test_happy_path.py | 2 +- tests/manual/external/pdap/conftest.py | 15 ++- .../external/pdap/test_get_follows_sync.py | 12 ++ 23 files changed, 404 insertions(+), 7 deletions(-) create mode 100644 alembic/versions/2025_12_24_1854-e88e4e962dc7_add_link__locations__user_follows_table.py create mode 100644 src/core/tasks/scheduled/impl/sync_from_ds/__init__.py create mode 100644 src/core/tasks/scheduled/impl/sync_from_ds/impl/__init__.py create mode 100644 src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/__init__.py create mode 100644 src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/core.py create mode 100644 src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/models/__init__.py create mode 100644 src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/models/user_location_pairs.py create mode 100644 src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/query.py create mode 100644 src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/types.py create mode 100644 src/db/models/impl/link/location__user_follow.py create mode 100644 src/external/pdap/impl/sync/follows/__init__.py create mode 100644 src/external/pdap/impl/sync/follows/core.py create mode 100644 src/external/pdap/impl/sync/follows/response.py create mode 100644 tests/automated/integration/tasks/scheduled/impl/sync_from_ds/__init__.py create mode 100644 tests/automated/integration/tasks/scheduled/impl/sync_from_ds/user_follows/__init__.py create mode 100644 tests/automated/integration/tasks/scheduled/impl/sync_from_ds/user_follows/test_core.py create mode 100644 tests/manual/external/pdap/test_get_follows_sync.py diff --git a/ENV.md b/ENV.md index 386dbdae..6ad39c02 100644 --- a/ENV.md +++ b/ENV.md @@ -80,6 +80,7 @@ Note that some tasks/subtasks are themselves enabled by other tasks. | `DS_APP_SYNC_META_URL_ADD_TASK_FLAG` | Adds new meta URLs to the Data Sources App| | `DS_APP_SYNC_META_URL_UPDATE_TASK_FLAG` | Updates existing meta URLs in the Data Sources App| | `DS_APP_SYNC_META_URL_DELETE_TASK_FLAG` | Deletes meta URLs in the Data Sources App| +| `DS_APP_SYNC_USER_FOLLOWS_GET_TASK_FLAG` | Gets user follows from the Data Sources App| | `INTEGRITY_MONITOR_TASK_FLAG` | Runs integrity checks. | ### URL Task Flags diff --git a/alembic/versions/2025_12_24_1854-e88e4e962dc7_add_link__locations__user_follows_table.py b/alembic/versions/2025_12_24_1854-e88e4e962dc7_add_link__locations__user_follows_table.py new file mode 100644 index 00000000..a2b82ff0 --- /dev/null +++ b/alembic/versions/2025_12_24_1854-e88e4e962dc7_add_link__locations__user_follows_table.py @@ -0,0 +1,42 @@ +"""Add link__locations__user_follows table + +Revision ID: e88e4e962dc7 +Revises: 30ee666f15d1 +Create Date: 2025-12-24 18:54:38.897466 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from src.util.alembic_helpers import add_enum_value, location_id_column, user_id_column, created_at_column + +# revision identifiers, used by Alembic. +revision: str = 'e88e4e962dc7' +down_revision: Union[str, None] = '30ee666f15d1' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +def upgrade() -> None: + _add_link_locations_user_follows_table() + _add_follows_sync_task() + +def _add_link_locations_user_follows_table(): + op.create_table( + "link__locations__user_follows", + location_id_column(), + user_id_column(), + created_at_column(), + sa.PrimaryKeyConstraint("location_id", "user_id"), + ) + + +def _add_follows_sync_task(): + add_enum_value( + enum_name="task_type", + enum_value="Sync User Follows Get" + ) + +def downgrade() -> None: + pass diff --git a/src/core/tasks/scheduled/impl/sync_from_ds/__init__.py b/src/core/tasks/scheduled/impl/sync_from_ds/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_from_ds/impl/__init__.py b/src/core/tasks/scheduled/impl/sync_from_ds/impl/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/__init__.py b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/core.py b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/core.py new file mode 100644 index 00000000..c26f2525 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/core.py @@ -0,0 +1,56 @@ +from src.core.tasks.scheduled.impl.sync_from_ds.impl.follows.models.user_location_pairs import UserLocationPairs +from src.core.tasks.scheduled.impl.sync_from_ds.impl.follows.query import UpdateFollowsInDBQueryBuilder +from src.core.tasks.scheduled.impl.sync_from_ds.impl.follows.types import UserID, LocationID +from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase +from src.db.client.async_ import AsyncDatabaseClient +from src.db.enums import TaskType +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.follows.core import GetFollowsRequestBuilder +from src.external.pdap.impl.sync.follows.response import SyncFollowGetInnerResponse + + +class DSAppSyncUserFollowsGetTaskOperator(ScheduledTaskOperatorBase): + + def __init__( + self, + adb_client: AsyncDatabaseClient, + pdap_client: PDAPClient + ): + super().__init__(adb_client) + self.pdap_client = pdap_client + + @property + def task_type(self) -> TaskType: + return TaskType.SYNC_USER_FOLLOWS_GET + + async def inner_task_logic(self) -> None: + responses = await self._get_follows_from_ds() + await self._update_follows_in_db(responses) + + async def _get_follows_from_ds(self) -> list[SyncFollowGetInnerResponse]: + return await self.pdap_client.run_request_builder( + GetFollowsRequestBuilder() + ) + + async def _update_follows_in_db(self, responses: list[SyncFollowGetInnerResponse]) -> None: + # Get response tuples + api_pairs: list[UserLocationPairs] = [ + UserLocationPairs( + user_id=UserID(response.user_id), + location_id=LocationID(response.location_id) + ) + for response in responses + ] + # Run query + await self.adb_client.run_query_builder( + UpdateFollowsInDBQueryBuilder(api_pairs=api_pairs) + ) + # + # async def _get_follows_in_db(self) -> list[tuple[int, int]]: + # query = ( + # select( + # LinkLocationUserFollow.user_id, + # LinkLocationUserFollow.location_id + # ) + # ) + # mappings: Sequence[RowMapping] = await self.adb_client.mappings(query) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/models/__init__.py b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/models/user_location_pairs.py b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/models/user_location_pairs.py new file mode 100644 index 00000000..58664fbd --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/models/user_location_pairs.py @@ -0,0 +1,19 @@ +""" + +Design Notes: + - I contemplated having this be a simple tuple, but reasoned it'd be more future-proof + if I used a Pydantic Model, so it would fail loudly in cause the API response + structure changes. + +""" + +from pydantic import BaseModel + +from src.core.tasks.scheduled.impl.sync_from_ds.impl.follows.types import LocationID, UserID + +class UserLocationPairs(BaseModel): + user_id: UserID + location_id: LocationID + + class Config: + frozen = True \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/query.py b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/query.py new file mode 100644 index 00000000..0f78a3da --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/query.py @@ -0,0 +1,74 @@ +from typing import Any, Sequence + +from sqlalchemy import select, RowMapping, delete, tuple_ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_from_ds.impl.follows.models.user_location_pairs import UserLocationPairs +from src.core.tasks.scheduled.impl.sync_from_ds.impl.follows.types import UserID, LocationID +from src.db.models.impl.link.location__user_follow import LinkLocationUserFollow +from src.db.queries.base.builder import QueryBuilderBase + +class UpdateFollowsInDBQueryBuilder(QueryBuilderBase): + + def __init__(self, api_pairs: list[UserLocationPairs]): + super().__init__() + self.api_pairs = api_pairs + + async def run(self, session: AsyncSession) -> Any: + db_pairs: list[UserLocationPairs] = await self.get_db_pairs(session) + api_pairs_set = set(self.api_pairs) + db_pairs_set = set(db_pairs) + # Get all pairs that are in the API but not in the DB + new_pairs = api_pairs_set - db_pairs_set + # Get all pairs that are in the DB but not in the API + removed_pairs = db_pairs_set - api_pairs_set + + await self.add_new_links(session, new_pairs) + await self.remove_links(session, removed_pairs) + + + async def get_db_pairs(self, session: AsyncSession) -> list[UserLocationPairs]: + query = ( + select( + LinkLocationUserFollow.user_id, + LinkLocationUserFollow.location_id + ) + ) + mappings: Sequence[RowMapping] = await self.sh.mappings(session, query=query) + return [ + UserLocationPairs( + user_id=mapping[LinkLocationUserFollow.user_id], + location_id=mapping[LinkLocationUserFollow.location_id] + ) + for mapping in mappings + ] + + async def add_new_links( + self, + session: AsyncSession, + pairs: set[UserLocationPairs] + ) -> None: + for pair in pairs: + link = LinkLocationUserFollow( + user_id=pair.user_id, + location_id=pair.location_id + ) + session.add(link) + + async def remove_links( + self, + session: AsyncSession, + removed_pairs: set[UserLocationPairs] + ) -> None: + tuples: list[tuple[UserID, LocationID]] = [ + (pair.user_id, pair.location_id) + for pair in removed_pairs + ] + statement = delete(LinkLocationUserFollow).where( + tuple_( + LinkLocationUserFollow.user_id, + LinkLocationUserFollow.location_id, + ).in_(tuples) + ) + await session.execute(statement) + diff --git a/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/types.py b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/types.py new file mode 100644 index 00000000..b3dc8e5b --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/types.py @@ -0,0 +1,4 @@ +from typing import NewType + +UserID = NewType("UserID", int) +LocationID = NewType("LocationID", int) diff --git a/src/core/tasks/scheduled/loader.py b/src/core/tasks/scheduled/loader.py index 61169a66..d2e96cc1 100644 --- a/src/core/tasks/scheduled/loader.py +++ b/src/core/tasks/scheduled/loader.py @@ -12,6 +12,7 @@ from src.core.tasks.scheduled.impl.mark_never_completed.operator import MarkTaskNeverCompletedOperator from src.core.tasks.scheduled.impl.refresh_materialized_views.operator import RefreshMaterializedViewsOperator from src.core.tasks.scheduled.impl.run_url_tasks.operator import RunURLTasksTaskOperator +from src.core.tasks.scheduled.impl.sync_from_ds.impl.follows.core import DSAppSyncUserFollowsGetTaskOperator from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.add.core import DSAppSyncAgenciesAddTaskOperator from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.delete.core import DSAppSyncAgenciesDeleteTaskOperator from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.update.core import DSAppSyncAgenciesUpdateTaskOperator @@ -136,6 +137,15 @@ async def load_entries(self) -> list[ScheduledTaskEntry]: enabled=self.setup_flag("INTEGRITY_MONITOR_TASK_FLAG") ), # Sync + ## Get + ScheduledTaskEntry( + operator=DSAppSyncUserFollowsGetTaskOperator( + adb_client=self.adb_client, + pdap_client=self.pdap_client + ), + interval_minutes=IntervalEnum.DAILY.value, + enabled=self.setup_flag("DS_APP_SYNC_USER_FOLLOWS_GET_TASK_FLAG") + ), ## Adds ### Agency ScheduledTaskEntry( diff --git a/src/db/enums.py b/src/db/enums.py index 65f446c5..97e2cc4b 100644 --- a/src/db/enums.py +++ b/src/db/enums.py @@ -75,6 +75,7 @@ class TaskType(PyEnum): SYNC_META_URLS_ADD = "Sync Meta URLs Add" SYNC_META_URLS_UPDATE = "Sync Meta URLs Update" SYNC_META_URLS_DELETE = "Sync Meta URLs Delete" + SYNC_USER_FOLLOWS_GET = "Sync User Follows Get" class ChangeLogOperationType(PyEnum): INSERT = "INSERT" diff --git a/src/db/models/impl/link/location__user_follow.py b/src/db/models/impl/link/location__user_follow.py new file mode 100644 index 00000000..a4f65281 --- /dev/null +++ b/src/db/models/impl/link/location__user_follow.py @@ -0,0 +1,20 @@ +from sqlalchemy import Integer, Column, PrimaryKeyConstraint + +from src.db.models.mixins import LocationDependentMixin, CreatedAtMixin +from src.db.models.templates_.base import Base + + +class LinkLocationUserFollow( + Base, + LocationDependentMixin, + CreatedAtMixin +): + __tablename__ = "link__locations__user_follows" + __table_args__ = ( + PrimaryKeyConstraint( + "user_id", + "location_id" + ), + ) + + user_id = Column(Integer, nullable=False) diff --git a/src/external/pdap/_templates/request_builder.py b/src/external/pdap/_templates/request_builder.py index 2cde6c51..887e2cfd 100644 --- a/src/external/pdap/_templates/request_builder.py +++ b/src/external/pdap/_templates/request_builder.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from http import HTTPStatus -from typing import Any +from typing import Any, TypeVar from pdap_access_manager.access_manager.async_ import AccessManagerAsync from pdap_access_manager.enums import RequestType @@ -8,6 +8,7 @@ from pdap_access_manager.models.response import ResponseInfo from pydantic import BaseModel +T = TypeVar("T", bound=BaseModel) class PDAPRequestBuilderBase(ABC): @@ -37,6 +38,21 @@ async def post( raise Exception(f"Failed to make request to PDAP: {response_info.data}") return response_info.data + async def get( + self, + url: str, + model: type[T] + ) -> T: + request_info = RequestInfo( + type_=RequestType.GET, + url=url, + headers=await self.access_manager.jwt_header() + ) + response_info: ResponseInfo = await self.access_manager.make_request(request_info) + if response_info.status_code != HTTPStatus.OK: + raise Exception(f"Failed to make request to PDAP: {response_info.data}") + return model(**response_info.data) + @abstractmethod async def inner_logic(self) -> Any: raise NotImplementedError diff --git a/src/external/pdap/impl/sync/follows/__init__.py b/src/external/pdap/impl/sync/follows/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/follows/core.py b/src/external/pdap/impl/sync/follows/core.py new file mode 100644 index 00000000..707ac8c9 --- /dev/null +++ b/src/external/pdap/impl/sync/follows/core.py @@ -0,0 +1,13 @@ +from src.external.pdap._templates.request_builder import PDAPRequestBuilderBase +from src.external.pdap.impl.sync.follows.response import SyncFollowGetInnerResponse, SyncFollowGetOuterResponse + + +class GetFollowsRequestBuilder(PDAPRequestBuilderBase): + + async def inner_logic(self) -> list[SyncFollowGetInnerResponse]: + url: str = self.build_url("v3/sync/follows") + response: SyncFollowGetOuterResponse = await self.get( + url=url, + model=SyncFollowGetOuterResponse + ) + return response.follows diff --git a/src/external/pdap/impl/sync/follows/response.py b/src/external/pdap/impl/sync/follows/response.py new file mode 100644 index 00000000..abdde583 --- /dev/null +++ b/src/external/pdap/impl/sync/follows/response.py @@ -0,0 +1,9 @@ +from pydantic import BaseModel + + +class SyncFollowGetInnerResponse(BaseModel): + user_id: int + location_id: int + +class SyncFollowGetOuterResponse(BaseModel): + follows: list[SyncFollowGetInnerResponse] diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_from_ds/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync_from_ds/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_from_ds/user_follows/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync_from_ds/user_follows/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_from_ds/user_follows/test_core.py b/tests/automated/integration/tasks/scheduled/impl/sync_from_ds/user_follows/test_core.py new file mode 100644 index 00000000..b95eb102 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_from_ds/user_follows/test_core.py @@ -0,0 +1,115 @@ +from http import HTTPStatus +from unittest.mock import AsyncMock + +import pytest +from pdap_access_manager.models.response import ResponseInfo + +from src.core.tasks.base.run_info import TaskOperatorRunInfo +from src.core.tasks.scheduled.impl.sync_from_ds.impl.follows.core import DSAppSyncUserFollowsGetTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.link.location__user_follow import LinkLocationUserFollow +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.follows.response import SyncFollowGetInnerResponse, SyncFollowGetOuterResponse +from tests.automated.integration.conftest import MOCK_USER_ID +from tests.helpers.asserts import assert_task_run_success +from tests.helpers.data_creator.models.creation_info.county import CountyCreationInfo +from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo +from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo + + +def mock_client( + mock_pdap_client: PDAPClient, + response: list[SyncFollowGetInnerResponse] +) -> None: + mock_pdap_client.access_manager.make_request = AsyncMock( + return_value=ResponseInfo( + status_code=HTTPStatus.OK, + data=SyncFollowGetOuterResponse( + follows=response + ).model_dump(mode='json') + ) + ) + +@pytest.mark.asyncio +async def test_core( + adb_client_test: AsyncDatabaseClient, + mock_pdap_client: PDAPClient, + pittsburgh_locality: LocalityCreationInfo, + allegheny_county: CountyCreationInfo, + pennsylvania: USStateCreationInfo +): + operator = DSAppSyncUserFollowsGetTaskOperator( + adb_client=adb_client_test, + pdap_client=mock_pdap_client + ) + + # Mock client to add 3 new follows + mock_client( + mock_pdap_client, + response=[ + SyncFollowGetInnerResponse( + user_id=MOCK_USER_ID, + location_id=pittsburgh_locality.location_id + ), + SyncFollowGetInnerResponse( + user_id=MOCK_USER_ID, + location_id=allegheny_county.location_id + ), + SyncFollowGetInnerResponse( + user_id=MOCK_USER_ID, + location_id=pennsylvania.location_id + ) + ] + ) + + # # Run Task + run_info: TaskOperatorRunInfo = await operator.run_task() + assert_task_run_success(run_info) + + # confirm three follows added + links: list[LinkLocationUserFollow] = await adb_client_test.get_all(LinkLocationUserFollow) + assert len(links) == 3 + link_tuples = [(link.user_id, link.location_id) for link in links] + assert (MOCK_USER_ID, pittsburgh_locality.location_id) in link_tuples + assert (MOCK_USER_ID, allegheny_county.location_id) in link_tuples + assert (MOCK_USER_ID, pennsylvania.location_id) in link_tuples + + # # Run Task again + run_info: TaskOperatorRunInfo = await operator.run_task() + assert_task_run_success(run_info) + + # # Confirm no new follows added + links: list[LinkLocationUserFollow] = await adb_client_test.get_all(LinkLocationUserFollow) + assert len(links) == 3 + link_tuples = [(link.user_id, link.location_id) for link in links] + assert (MOCK_USER_ID, pittsburgh_locality.location_id) in link_tuples + assert (MOCK_USER_ID, allegheny_county.location_id) in link_tuples + assert (MOCK_USER_ID, pennsylvania.location_id) in link_tuples + + + # Mock client to add only two of the follows + mock_client( + mock_pdap_client, + response=[ + SyncFollowGetInnerResponse( + user_id=MOCK_USER_ID, + location_id=pittsburgh_locality.location_id + ), + SyncFollowGetInnerResponse( + user_id=MOCK_USER_ID, + location_id=allegheny_county.location_id + ), + ] + ) + + # # Run Task again + run_info: TaskOperatorRunInfo = await operator.run_task() + assert_task_run_success(run_info) + # Confirm one of the follows is removed + + links: list[LinkLocationUserFollow] = await adb_client_test.get_all(LinkLocationUserFollow) + assert len(links) == 2 + link_tuples = [(link.user_id, link.location_id) for link in links] + assert (MOCK_USER_ID, pittsburgh_locality.location_id) in link_tuples + assert (MOCK_USER_ID, allegheny_county.location_id) in link_tuples + diff --git a/tests/automated/integration/tasks/scheduled/loader/test_happy_path.py b/tests/automated/integration/tasks/scheduled/loader/test_happy_path.py index 4e5bb551..cb70ff8c 100644 --- a/tests/automated/integration/tasks/scheduled/loader/test_happy_path.py +++ b/tests/automated/integration/tasks/scheduled/loader/test_happy_path.py @@ -2,7 +2,7 @@ from src.core.tasks.scheduled.loader import ScheduledTaskOperatorLoader -NUMBER_OF_ENTRIES = 21 +NUMBER_OF_ENTRIES = 22 @pytest.mark.asyncio async def test_happy_path( diff --git a/tests/manual/external/pdap/conftest.py b/tests/manual/external/pdap/conftest.py index de386ad7..51c1947c 100644 --- a/tests/manual/external/pdap/conftest.py +++ b/tests/manual/external/pdap/conftest.py @@ -1,7 +1,8 @@ import pytest import pytest_asyncio from aiohttp import ClientSession -from pdap_access_manager import AccessManager +from pdap_access_manager.access_manager.async_ import AccessManagerAsync as AccessManager +from pdap_access_manager.models.auth import AuthInfo from src.external.pdap.client import PDAPClient from src.util.helper_functions import get_from_env @@ -15,8 +16,10 @@ async def client_session(): @pytest.fixture def access_manager(client_session): return AccessManager( - email=get_from_env("PDAP_PROD_EMAIL"), - password=get_from_env("PDAP_PROD_PASSWORD"), + auth=AuthInfo( + email=get_from_env("PDAP_PROD_EMAIL"), + password=get_from_env("PDAP_PROD_PASSWORD"), + ), api_key=get_from_env("PDAP_API_KEY", allow_none=True), session=client_session ) @@ -24,8 +27,10 @@ def access_manager(client_session): @pytest.fixture def access_manager_dev(client_session): return AccessManager( - email=get_from_env("PDAP_DEV_EMAIL"), - password=get_from_env("PDAP_DEV_PASSWORD"), + auth=AuthInfo( + email=get_from_env("PDAP_DEV_EMAIL"), + password=get_from_env("PDAP_DEV_PASSWORD"), + ), api_key=get_from_env("PDAP_DEV_API_KEY", allow_none=True), data_sources_url=get_from_env("PDAP_DEV_API_URL"), session=client_session diff --git a/tests/manual/external/pdap/test_get_follows_sync.py b/tests/manual/external/pdap/test_get_follows_sync.py new file mode 100644 index 00000000..9d62209b --- /dev/null +++ b/tests/manual/external/pdap/test_get_follows_sync.py @@ -0,0 +1,12 @@ +import pytest + +from src.external.pdap.impl.sync.follows.core import GetFollowsRequestBuilder + + +@pytest.mark.asyncio +async def test_get_follows_sync(pdap_client_dev): + + response = await pdap_client_dev.run_request_builder( + GetFollowsRequestBuilder() + ) + print(response) From 2607ef3bbad015bfdd3091c1d3498fc691eb8376 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Fri, 26 Dec 2025 15:33:31 -0500 Subject: [PATCH 15/24] Refine sorting --- ...933d84aa52_revise_annotation_count_view.py | 210 ++++++++++++++++++ .../annotate/_shared/queries/helper.py | 79 ++++--- .../annotate/all/get/queries/core.py | 79 ++++--- .../all/get/queries/features/README.md | 1 + .../all/get/queries/features/__init__.py | 0 .../queries/features/followed_by_any_user.py | 27 +++ .../get/queries/features/followed_by_user.py | 30 +++ .../annotate/all/get/queries/helpers.py | 26 +++ .../annotate/anonymous/get/helpers.py | 10 +- .../endpoints/annotate/anonymous/get/query.py | 59 +++-- src/db/models/impl/__init__.py | 3 + src/db/models/views/url_anno_count.py | 6 +- .../api/annotate/all/test_sorting.py | 67 +++++- 13 files changed, 505 insertions(+), 92 deletions(-) create mode 100644 alembic/versions/2025_12_26_1527-42933d84aa52_revise_annotation_count_view.py create mode 100644 src/api/endpoints/annotate/all/get/queries/features/README.md create mode 100644 src/api/endpoints/annotate/all/get/queries/features/__init__.py create mode 100644 src/api/endpoints/annotate/all/get/queries/features/followed_by_any_user.py create mode 100644 src/api/endpoints/annotate/all/get/queries/features/followed_by_user.py create mode 100644 src/api/endpoints/annotate/all/get/queries/helpers.py diff --git a/alembic/versions/2025_12_26_1527-42933d84aa52_revise_annotation_count_view.py b/alembic/versions/2025_12_26_1527-42933d84aa52_revise_annotation_count_view.py new file mode 100644 index 00000000..241c7845 --- /dev/null +++ b/alembic/versions/2025_12_26_1527-42933d84aa52_revise_annotation_count_view.py @@ -0,0 +1,210 @@ +"""Revise annotation count view + +Revision ID: 42933d84aa52 +Revises: e88e4e962dc7 +Create Date: 2025-12-26 15:27:30.368862 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '42933d84aa52' +down_revision: Union[str, None] = 'e88e4e962dc7' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.execute("""DROP VIEW IF EXISTS url_annotation_count_view""") + op.execute( + """ + CREATE VIEW url_annotation_count_view AS + WITH + auto_location_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__location__auto__subtasks anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , auto_agency_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__agency__auto__subtasks anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , auto_url_type_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__url_type__auto anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , auto_record_type_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__record_type__auto anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , user_location_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__location__user anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , user_agency_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__agency__user anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , user_url_type_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__url_type__user anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , user_record_type_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__record_type__user anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , anon_location_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__location__anon anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , anon_agency_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__agency__anon anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , anon_url_type_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__url_type__anon anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , anon_record_type_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__record_type__anon anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + SELECT + u.id AS url_id, + COALESCE(auto_ag.cnt, 0::bigint) AS auto_agency_count, + COALESCE(auto_loc.cnt, 0::bigint) AS auto_location_count, + COALESCE(auto_rec.cnt, 0::bigint) AS auto_record_type_count, + COALESCE(auto_typ.cnt, 0::bigint) AS auto_url_type_count, + COALESCE(user_ag.cnt, 0::bigint) AS user_agency_count, + COALESCE(user_loc.cnt, 0::bigint) AS user_location_count, + COALESCE(user_rec.cnt, 0::bigint) AS user_record_type_count, + COALESCE(user_typ.cnt, 0::bigint) AS user_url_type_count, + COALESCE(anon_ag.cnt, 0::bigint) AS anon_agency_count, + COALESCE(anon_loc.cnt, 0::bigint) AS anon_location_count, + COALESCE(anon_rec.cnt, 0::bigint) AS anon_record_type_count, + COALESCE(anon_typ.cnt, 0::bigint) AS anon_url_type_count, + COALESCE(auto_ag.cnt, 0::bigint) + COALESCE(auto_loc.cnt, 0::bigint) + COALESCE(auto_rec.cnt, 0::bigint) + + COALESCE(auto_typ.cnt, 0::bigint) + COALESCE(user_ag.cnt, 0::bigint) + COALESCE(user_loc.cnt, 0::bigint) + + COALESCE(user_rec.cnt, 0::bigint) + COALESCE(user_typ.cnt, 0::bigint) + COALESCE(anon_ag.cnt, 0::bigint) + + COALESCE(anon_loc.cnt, 0::bigint) + COALESCE(anon_rec.cnt, 0::bigint) + COALESCE(anon_typ.cnt, 0::bigint) AS total_anno_count + + FROM + urls u + LEFT JOIN auto_agency_count auto_ag + ON auto_ag.id = u.id + LEFT JOIN auto_location_count auto_loc + ON auto_loc.id = u.id + LEFT JOIN auto_record_type_count auto_rec + ON auto_rec.id = u.id + LEFT JOIN auto_url_type_count auto_typ + ON auto_typ.id = u.id + LEFT JOIN user_agency_count user_ag + ON user_ag.id = u.id + LEFT JOIN user_location_count user_loc + ON user_loc.id = u.id + LEFT JOIN user_record_type_count user_rec + ON user_rec.id = u.id + LEFT JOIN user_url_type_count user_typ + ON user_typ.id = u.id + LEFT JOIN anon_agency_count anon_ag + ON user_ag.id = u.id + LEFT JOIN anon_location_count anon_loc + ON user_loc.id = u.id + LEFT JOIN anon_record_type_count anon_rec + ON user_rec.id = u.id + LEFT JOIN anon_url_type_count anon_typ + ON user_typ.id = u.id + + """ + ) + + +def downgrade() -> None: + pass diff --git a/src/api/endpoints/annotate/_shared/queries/helper.py b/src/api/endpoints/annotate/_shared/queries/helper.py index f8bdf033..76def5c1 100644 --- a/src/api/endpoints/annotate/_shared/queries/helper.py +++ b/src/api/endpoints/annotate/_shared/queries/helper.py @@ -2,7 +2,7 @@ This module contains helper functions for the annotate GET queries """ -from sqlalchemy import Select, case, exists, select +from sqlalchemy import Select, case, CTE, ColumnElement from sqlalchemy.orm import joinedload from src.collectors.enums import URLStatus @@ -15,10 +15,9 @@ from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView -def get_select() -> Select: - return ( - Select(URL) - +def add_joins(query: Select) -> Select: + query = ( + query .join( URLAnnotationFlagsView, URLAnnotationFlagsView.url_id == URL.id @@ -28,10 +27,12 @@ def get_select() -> Select: URLAnnotationCount.url_id == URL.id ) ) + return query -def conclude(query: Select) -> Select: - # Add common where conditions - query = query.where( +def add_common_where_conditions( + query: Select, +) -> Select: + return query.where( URL.status == URLStatus.OK.value, not_exists_url( FlagURLSuspended @@ -42,29 +43,41 @@ def conclude(query: Select) -> Select: ) ) - - query = ( - # Add load options - query.options( - joinedload(URL.html_content), - joinedload(URL.user_url_type_suggestions), - joinedload(URL.user_record_type_suggestions), - joinedload(URL.anon_record_type_suggestions), - joinedload(URL.anon_url_type_suggestions), - ) - # Sorting Priority - .order_by( - # Privilege manually submitted URLs first - case( - (URL.source == URLSource.MANUAL, 0), - else_=1 - ).asc(), - # Break ties by favoring URL with higher total annotations - URLAnnotationCount.total_anno_count.desc(), - # Break additional ties by favoring least recently created URLs - URL.id.asc() - ) - # Limit to 1 result - .limit(1) +def add_load_options( + query: Select +) -> Select: + return query.options( + joinedload(URL.html_content), + joinedload(URL.user_url_type_suggestions), + joinedload(URL.user_record_type_suggestions), + joinedload(URL.anon_record_type_suggestions), + joinedload(URL.anon_url_type_suggestions), ) - return query \ No newline at end of file + +def bool_sort( + condition: ColumnElement[bool] +) -> ColumnElement[int]: + return case( + (condition, 0), + else_=1 + ).asc() + +def common_sorts( + base_cte: CTE +) -> list[ColumnElement[int]]: + return [ + # Privilege URLs whose batches are associated with locations + # followed by ANY user + bool_sort(base_cte.c.followed_by_any_user), + # Privilege Manually Submitted URLs + bool_sort(URL.source == URLSource.MANUAL), + # Privilege based on total number of user annotations + URLAnnotationCount.user_url_type_count.desc(), + # Privilege based on total number of anon annotations + URLAnnotationCount.anon_url_type_count.desc(), + # Privilege based on total number of auto annotations + URLAnnotationCount.auto_url_type_count.desc(), + # Break additional ties by favoring least recently created URLs + URL.id.asc() + ] + diff --git a/src/api/endpoints/annotate/all/get/queries/core.py b/src/api/endpoints/annotate/all/get/queries/core.py index 852886c6..a382f0b4 100644 --- a/src/api/endpoints/annotate/all/get/queries/core.py +++ b/src/api/endpoints/annotate/all/get/queries/core.py @@ -1,9 +1,12 @@ -from sqlalchemy import exists, select +from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.annotate._shared.extract import extract_and_format_get_annotation_result from src.api.endpoints.annotate._shared.queries import helper from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse +from src.api.endpoints.annotate.all.get.queries.features.followed_by_any_user import get_followed_by_any_user_feature +from src.api.endpoints.annotate.all.get.queries.features.followed_by_user import get_followed_by_user_feature +from src.api.endpoints.annotate.all.get.queries.helpers import not_exists_user_annotation from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser @@ -30,55 +33,63 @@ async def run( self, session: AsyncSession ) -> GetNextURLForAllAnnotationResponse: - query = helper.get_select() + base_cte = select( + URL.id, + get_followed_by_user_feature(self.user_id), + get_followed_by_any_user_feature() + ).cte("base") + + query = select( + URL, + base_cte.c.followed_by_user, + base_cte.c.followed_by_any_user, + ).join( + base_cte, + base_cte.c.id == URL.id + ) + query = helper.add_joins(query) # Add user annotation-specific joins and conditions if self.batch_id is not None: query = query.join(LinkBatchURL).where(LinkBatchURL.batch_id == self.batch_id) if self.url_id is not None: query = query.where(URL.id == self.url_id) + + user_models = [ + AnnotationURLTypeUser, + AnnotationAgencyUser, + AnnotationLocationUser, + AnnotationRecordTypeUser, + ] + query = ( query .where( # Must not have been previously annotated by user - ~exists( - select(AnnotationURLTypeUser.url_id) - .where( - AnnotationURLTypeUser.url_id == URL.id, - AnnotationURLTypeUser.user_id == self.user_id, - ) - ), - ~exists( - select(AnnotationAgencyUser.url_id) - .where( - AnnotationAgencyUser.url_id == URL.id, - AnnotationAgencyUser.user_id == self.user_id, - ) - ), - ~exists( - select( - AnnotationLocationUser.url_id - ) - .where( - AnnotationLocationUser.url_id == URL.id, - AnnotationLocationUser.user_id == self.user_id, - ) - ), - ~exists( - select( - AnnotationRecordTypeUser.url_id - ) - .where( - AnnotationRecordTypeUser.url_id == URL.id, - AnnotationRecordTypeUser.user_id == self.user_id, - ) + *[ + not_exists_user_annotation( + user_id=self.user_id, + user_model=user_model ) + for user_model in user_models + ] ) ) # Conclude query with limit and sorting - query = helper.conclude(query) + query = helper.add_common_where_conditions(query) + query = helper.add_load_options(query) + query = ( + # Sorting Priority + query.order_by( + # If the specific user follows *this* location, privilege it + helper.bool_sort(base_cte.c.followed_by_user), + *helper.common_sorts(base_cte) + ) + # Limit to 1 result + .limit(1) + ) raw_results = (await session.execute(query)).unique() url: URL | None = raw_results.scalars().one_or_none() diff --git a/src/api/endpoints/annotate/all/get/queries/features/README.md b/src/api/endpoints/annotate/all/get/queries/features/README.md new file mode 100644 index 00000000..e37fe6e5 --- /dev/null +++ b/src/api/endpoints/annotate/all/get/queries/features/README.md @@ -0,0 +1 @@ +"Features" in this case refers to EXISTs subqueries which are separately calculated and used for sorting. \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/get/queries/features/__init__.py b/src/api/endpoints/annotate/all/get/queries/features/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/annotate/all/get/queries/features/followed_by_any_user.py b/src/api/endpoints/annotate/all/get/queries/features/followed_by_any_user.py new file mode 100644 index 00000000..e14ddddd --- /dev/null +++ b/src/api/endpoints/annotate/all/get/queries/features/followed_by_any_user.py @@ -0,0 +1,27 @@ +from sqlalchemy import exists, select, literal, Exists + +from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL +from src.db.models.impl.link.location__user_follow import LinkLocationUserFollow +from src.db.models.impl.link.location_batch.sqlalchemy import LinkLocationBatch +from src.db.models.impl.url.core.sqlalchemy import URL + + +def get_followed_by_any_user_feature() -> Exists: + query = ( + exists( + select(literal(1)) + .select_from(LinkBatchURL) + .join( + LinkLocationBatch, + LinkLocationBatch.batch_id == LinkBatchURL.batch_id + ) + .join( + LinkLocationUserFollow, + LinkLocationUserFollow.location_id == LinkLocationBatch.location_id + ) + .where( + URL.id == LinkBatchURL.url_id, + ) + ).label("followed_by_any_user") + ) + return query \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/get/queries/features/followed_by_user.py b/src/api/endpoints/annotate/all/get/queries/features/followed_by_user.py new file mode 100644 index 00000000..b73d4cd4 --- /dev/null +++ b/src/api/endpoints/annotate/all/get/queries/features/followed_by_user.py @@ -0,0 +1,30 @@ +from sqlalchemy import exists, select, literal, Exists + +from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL +from src.db.models.impl.link.location__user_follow import LinkLocationUserFollow +from src.db.models.impl.link.location_batch.sqlalchemy import LinkLocationBatch +from src.db.models.impl.url.core.sqlalchemy import URL + + +def get_followed_by_user_feature( + user_id: int +) -> Exists: + query = ( + exists( + select(literal(1)) + .select_from(LinkBatchURL) + .join( + LinkLocationBatch, + LinkLocationBatch.batch_id == LinkBatchURL.batch_id + ) + .join( + LinkLocationUserFollow, + LinkLocationUserFollow.location_id == LinkLocationBatch.location_id + ) + .where( + URL.id == LinkBatchURL.url_id, + LinkLocationUserFollow.user_id == user_id + ) + ).label("followed_by_user") + ) + return query \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/get/queries/helpers.py b/src/api/endpoints/annotate/all/get/queries/helpers.py new file mode 100644 index 00000000..da112099 --- /dev/null +++ b/src/api/endpoints/annotate/all/get/queries/helpers.py @@ -0,0 +1,26 @@ +from typing import Protocol, TypeVar + +from sqlalchemy import ColumnElement, select, exists + +from src.db.models.impl.url.core.sqlalchemy import URL + + +class UserURLModelProtocol( + Protocol, +): + user_id: ColumnElement[int] + url_id: ColumnElement[int] + +UserModel = TypeVar("UserModel", bound=UserURLModelProtocol) + +def not_exists_user_annotation( + user_id: int, + user_model: UserModel +) -> ColumnElement[bool]: + return ~exists( + select(user_model.url_id) + .where( + user_model.url_id == URL.id, + user_model.user_id == user_id, + ) + ) \ No newline at end of file diff --git a/src/api/endpoints/annotate/anonymous/get/helpers.py b/src/api/endpoints/annotate/anonymous/get/helpers.py index 83a10845..96a15680 100644 --- a/src/api/endpoints/annotate/anonymous/get/helpers.py +++ b/src/api/endpoints/annotate/anonymous/get/helpers.py @@ -1,12 +1,9 @@ from typing import Protocol, TypeVar from uuid import UUID -from marshmallow.fields import Bool -from sqlalchemy import Exists, select, exists, ColumnElement, Boolean +from sqlalchemy import select, exists, ColumnElement from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.mixins import AnonymousSessionMixin, URLDependentMixin -from src.db.models.templates_.base import Base class AnonymousURLModelProtocol( @@ -17,7 +14,10 @@ class AnonymousURLModelProtocol( AnonModel = TypeVar("AnonModel", bound=AnonymousURLModelProtocol) -def not_exists_anon_annotation(session_id: UUID, anon_model: AnonModel) -> ColumnElement[bool]: +def not_exists_anon_annotation( + session_id: UUID, + anon_model: AnonModel +) -> ColumnElement[bool]: return ~exists( select(anon_model.url_id) .where( diff --git a/src/api/endpoints/annotate/anonymous/get/query.py b/src/api/endpoints/annotate/anonymous/get/query.py index 684df2f5..c53726e1 100644 --- a/src/api/endpoints/annotate/anonymous/get/query.py +++ b/src/api/endpoints/annotate/anonymous/get/query.py @@ -1,10 +1,14 @@ from uuid import UUID +from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.annotate._shared.extract import extract_and_format_get_annotation_result from src.api.endpoints.annotate._shared.queries import helper +from src.api.endpoints.annotate._shared.queries.helper import add_common_where_conditions, add_load_options, \ + common_sorts from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse +from src.api.endpoints.annotate.all.get.queries.features.followed_by_any_user import get_followed_by_any_user_feature from src.api.endpoints.annotate.anonymous.get.helpers import not_exists_anon_annotation from src.api.endpoints.annotate.anonymous.get.response import GetNextURLForAnonymousAnnotationResponse from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon @@ -25,32 +29,51 @@ def __init__( self.session_id = session_id async def run(self, session: AsyncSession) -> GetNextURLForAnonymousAnnotationResponse: - query = helper.get_select() + base_cte = select( + URL.id, + get_followed_by_any_user_feature() + ).cte("base") + + query = select( + URL, + base_cte.c.followed_by_any_user, + ).join( + base_cte, + base_cte.c.id == URL.id + ) + query = helper.add_joins(query) + + anon_models = [ + AnnotationURLTypeAnon, + AnnotationRecordTypeAnon, + AnnotationLocationAnon, + AnnotationAgencyAnon + ] # Add anonymous annotation-specific conditions. query = ( query .where( # Must not have been previously annotated by user - not_exists_anon_annotation( - session_id=self.session_id, - anon_model=AnnotationURLTypeAnon - ), - not_exists_anon_annotation( - session_id=self.session_id, - anon_model=AnnotationRecordTypeAnon - ), - not_exists_anon_annotation( - session_id=self.session_id, - anon_model=AnnotationLocationAnon - ), - not_exists_anon_annotation( - session_id=self.session_id, - anon_model=AnnotationAgencyAnon - ) + *[ + not_exists_anon_annotation( + session_id=self.session_id, + anon_model=anon_model + ) + for anon_model in anon_models + ] + ) + ) + query = add_common_where_conditions(query) + query = add_load_options(query) + query = ( + # Sorting Priority + query.order_by( + *common_sorts(base_cte) ) + # Limit to 1 result + .limit(1) ) - query = helper.conclude(query) raw_results = (await session.execute(query)).unique() url: URL | None = raw_results.scalars().one_or_none() diff --git a/src/db/models/impl/__init__.py b/src/db/models/impl/__init__.py index e69de29b..9e679b72 100644 --- a/src/db/models/impl/__init__.py +++ b/src/db/models/impl/__init__.py @@ -0,0 +1,3 @@ + +from .link.location_batch.sqlalchemy import LinkLocationBatch +from .link.batch_url.sqlalchemy import LinkBatchURL \ No newline at end of file diff --git a/src/db/models/views/url_anno_count.py b/src/db/models/views/url_anno_count.py index f3909b39..139b0bac 100644 --- a/src/db/models/views/url_anno_count.py +++ b/src/db/models/views/url_anno_count.py @@ -117,4 +117,8 @@ class URLAnnotationCount( user_location_count = Column(Integer, nullable=False) user_record_type_count = Column(Integer, nullable=False) user_url_type_count = Column(Integer, nullable=False) - total_anno_count = Column(Integer, nullable=False) \ No newline at end of file + anon_agency_count = Column(Integer, nullable=False) + anon_location_count = Column(Integer, nullable=False) + anon_record_type_count = Column(Integer, nullable=False) + anon_url_type_count = Column(Integer, nullable=False) + total_anno_count = Column(Integer, nullable=False) diff --git a/tests/automated/integration/api/annotate/all/test_sorting.py b/tests/automated/integration/api/annotate/all/test_sorting.py index a1c59813..1a81dc89 100644 --- a/tests/automated/integration/api/annotate/all/test_sorting.py +++ b/tests/automated/integration/api/annotate/all/test_sorting.py @@ -1,7 +1,14 @@ import pytest +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL +from src.db.models.impl.link.location__user_follow import LinkLocationUserFollow +from src.db.models.impl.link.location_batch.sqlalchemy import LinkLocationBatch from src.db.models.impl.url.core.enums import URLSource +from tests.automated.integration.conftest import MOCK_USER_ID from tests.helpers.api_test_helper import APITestHelper +from tests.helpers.data_creator.models.creation_info.county import CountyCreationInfo +from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review from tests.helpers.setup.final_review.model import FinalReviewSetupInfo @@ -9,7 +16,9 @@ @pytest.mark.asyncio async def test_annotate_sorting( api_test_helper: APITestHelper, - + test_batch_id: int, + pittsburgh_locality: LocalityCreationInfo, + allegheny_county: CountyCreationInfo, ): """ Test that annotations are prioritized in the following order: @@ -18,6 +27,7 @@ async def test_annotate_sorting( - Then prioritize by URL ID ascending (e.g. least recently created) """ ath = api_test_helper + dbc: AsyncDatabaseClient = ath.adb_client() # First URL created should be prioritized in absence of any other factors setup_info_first_annotation: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review( @@ -46,3 +56,58 @@ async def test_annotate_sorting( get_response_3 = await ath.request_validator.get_next_url_for_all_annotations() assert get_response_3.next_annotation is not None assert get_response_3.next_annotation.url_info.url_id == setup_info_manual_submission.url_mapping.url_id + + # URL with followed_by_any_user should take precedence over manual submissions + + ## Start by adding a new URL + setup_info_followed_by_any_user: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, + include_user_annotations=False + ) + ## Add a link between that URL's batch and a location + link_batch_location = LinkLocationBatch( + batch_id=setup_info_followed_by_any_user.batch_id, + location_id=pittsburgh_locality.location_id + ) + await dbc.add(link_batch_location) + ## Add a link between that location and a user + link_location_user_follow = LinkLocationUserFollow( + location_id=pittsburgh_locality.location_id, + user_id=MOCK_USER_ID + 1 # To ensure it's not the same user we'll be using later on. + ) + await dbc.add(link_location_user_follow) + + # Run get_next_url_for_all_annotations + get_response_4 = await ath.request_validator.get_next_url_for_all_annotations() + # Assert that the URL with followed_by_any_user is returned + assert get_response_4.next_annotation is not None + assert get_response_4.next_annotation.url_info.url_id == setup_info_followed_by_any_user.url_mapping.url_id + + # URL whose associated location is followed by this specific user + # should take precedence over URL whose associated location + # is followed by any user + + ## Start by adding a new URL + setup_info_followed_by_annotating_user: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, + include_user_annotations=False + ) + + ## Add a link between that URL's batch and a location + link_batch_location = LinkLocationBatch( + batch_id=setup_info_followed_by_annotating_user.batch_id, + location_id=allegheny_county.location_id + ) + await dbc.add(link_batch_location) + ## Add a link between that location and the mock user + link_location_user_follow = LinkLocationUserFollow( + location_id=allegheny_county.location_id, + user_id=MOCK_USER_ID + ) + await dbc.add(link_location_user_follow) + + get_response_5 = await ath.request_validator.get_next_url_for_all_annotations() + # Assert that the URL with followed_by_any_user is returned + assert get_response_5.next_annotation is not None + assert get_response_5.next_annotation.url_info.url_id == setup_info_followed_by_annotating_user.url_mapping.url_id + From 82e87e871d466341f2dd55a84795667d3a507d15 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Fri, 26 Dec 2025 16:02:47 -0500 Subject: [PATCH 16/24] Update permissions --- src/api/endpoints/agencies/routes.py | 8 ++++++++ src/api/endpoints/annotate/routes.py | 4 ++-- src/api/endpoints/batch/routes.py | 14 +++++++------- src/api/endpoints/collector/routes.py | 18 +++++++++--------- src/api/endpoints/data_source/routes.py | 5 +++++ src/api/endpoints/locations/routes.py | 3 +++ src/api/endpoints/meta_url/routes.py | 5 +++++ src/api/endpoints/metrics/routes.py | 16 ++++++++-------- src/api/endpoints/proposals/routes.py | 8 ++++---- src/api/endpoints/root.py | 4 ++-- src/api/endpoints/search/routes.py | 6 +++--- src/api/endpoints/submit/routes.py | 4 ++-- src/api/endpoints/task/routes.py | 8 ++++---- src/api/endpoints/url/routes.py | 5 +++-- src/security/manager.py | 2 +- tests/automated/integration/conftest.py | 4 ++-- .../security_manager/test_security_manager.py | 4 ++-- 17 files changed, 70 insertions(+), 48 deletions(-) diff --git a/src/api/endpoints/agencies/routes.py b/src/api/endpoints/agencies/routes.py index b0a756aa..bfbf456f 100644 --- a/src/api/endpoints/agencies/routes.py +++ b/src/api/endpoints/agencies/routes.py @@ -16,6 +16,8 @@ from src.api.endpoints.agencies.root.post.response import AgencyPostResponse from src.api.shared.models.message_response import MessageResponse from src.core.core import AsyncCore +from src.security.dtos.access_info import AccessInfo +from src.security.manager import get_admin_access_info agencies_router = APIRouter(prefix="/agencies", tags=["Agencies"]) @@ -34,7 +36,9 @@ async def get_agencies( @agencies_router.post("") async def create_agency( request: AgencyPostRequest, + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), + ) -> AgencyPostResponse: return await async_core.adb_client.run_query_builder( AddAgencyQueryBuilder(request=request) @@ -45,6 +49,7 @@ async def delete_agency( agency_id: int = Path( description="Agency ID to delete" ), + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await async_core.adb_client.run_query_builder( @@ -58,6 +63,7 @@ async def update_agency( agency_id: int = Path( description="Agency ID to update" ), + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await async_core.adb_client.run_query_builder( @@ -84,6 +90,7 @@ async def add_location_to_agency( location_id: int = Path( description="Location ID to add" ), + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await async_core.adb_client.run_query_builder( @@ -99,6 +106,7 @@ async def remove_location_from_agency( location_id: int = Path( description="Location ID to remove" ), + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await async_core.adb_client.run_query_builder( diff --git a/src/api/endpoints/annotate/routes.py b/src/api/endpoints/annotate/routes.py index 945de945..0af2afcb 100644 --- a/src/api/endpoints/annotate/routes.py +++ b/src/api/endpoints/annotate/routes.py @@ -17,7 +17,7 @@ from src.core.core import AsyncCore from src.db.queries.implementations.anonymous_session import MakeAnonymousSessionQueryBuilder from src.security.dtos.access_info import AccessInfo -from src.security.manager import get_access_info, get_standard_user_access_info +from src.security.manager import get_admin_access_info, get_standard_user_access_info annotate_router = APIRouter( prefix="/annotate", @@ -136,7 +136,7 @@ async def migrate_annotations_to_user( async def get_agency_suggestions( url_id: int, async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), location_id: int | None = Query(default=None) ) -> AgencyAnnotationResponseOuterInfo: return await async_core.adb_client.run_query_builder( diff --git a/src/api/endpoints/batch/routes.py b/src/api/endpoints/batch/routes.py index 87839fb7..5cc5265c 100644 --- a/src/api/endpoints/batch/routes.py +++ b/src/api/endpoints/batch/routes.py @@ -12,7 +12,7 @@ from src.core.core import AsyncCore from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum from src.security.dtos.access_info import AccessInfo -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info batch_router = APIRouter( prefix="/batch", @@ -36,7 +36,7 @@ async def get_batch_status( default=1 ), core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> GetBatchSummariesResponse: """ Get the status of recent batches @@ -52,7 +52,7 @@ async def get_batch_status( async def get_batch_info( batch_id: int = Path(description="The batch id"), core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> BatchSummary: return await core.get_batch_info(batch_id) @@ -64,7 +64,7 @@ async def get_urls_by_batch( default=1 ), core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> GetURLsByBatchResponse: return await core.get_urls_by_batch(batch_id, page=page) @@ -76,7 +76,7 @@ async def get_duplicates_by_batch( default=1 ), core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> GetDuplicatesByBatchResponse: return await core.get_duplicate_urls_by_batch(batch_id, page=page) @@ -84,7 +84,7 @@ async def get_duplicates_by_batch( async def get_batch_logs( batch_id: int = Path(description="The batch id"), async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> GetBatchLogsResponse: """ Retrieve the logs for a recent batch. @@ -96,6 +96,6 @@ async def get_batch_logs( async def abort_batch( batch_id: int = Path(description="The batch id"), async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> MessageResponse: return await async_core.abort_batch(batch_id) \ No newline at end of file diff --git a/src/api/endpoints/collector/routes.py b/src/api/endpoints/collector/routes.py index 4818dc63..0ab89261 100644 --- a/src/api/endpoints/collector/routes.py +++ b/src/api/endpoints/collector/routes.py @@ -10,7 +10,7 @@ from src.collectors.impl.example.dtos.input import ExampleInputDTO from src.collectors.enums import CollectorType from src.core.core import AsyncCore -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info from src.security.dtos.access_info import AccessInfo from src.collectors.impl.ckan.dtos.input import CKANInputDTO from src.collectors.impl.muckrock.collectors.all_foia.dto import MuckrockAllFOIARequestsCollectorInputDTO @@ -27,7 +27,7 @@ async def start_example_collector( dto: ExampleInputDTO, core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> CollectorStartInfo: """ Start the example collector @@ -42,7 +42,7 @@ async def start_example_collector( async def start_ckan_collector( dto: CKANInputDTO, core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> CollectorStartInfo: """ Start the ckan collector @@ -57,7 +57,7 @@ async def start_ckan_collector( async def start_common_crawler_collector( dto: CommonCrawlerInputDTO, core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> CollectorStartInfo: """ Start the common crawler collector @@ -72,7 +72,7 @@ async def start_common_crawler_collector( async def start_auto_googler_collector( dto: AutoGooglerInputDTO, core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> CollectorStartInfo: """ Start the auto googler collector @@ -87,7 +87,7 @@ async def start_auto_googler_collector( async def start_muckrock_collector( dto: MuckrockSimpleSearchCollectorInputDTO, core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> CollectorStartInfo: """ Start the muckrock collector @@ -102,7 +102,7 @@ async def start_muckrock_collector( async def start_muckrock_county_collector( dto: MuckrockCountySearchCollectorInputDTO, core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> CollectorStartInfo: """ Start the muckrock county level collector @@ -117,7 +117,7 @@ async def start_muckrock_county_collector( async def start_muckrock_all_foia_collector( dto: MuckrockAllFOIARequestsCollectorInputDTO, core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> CollectorStartInfo: """ Start the muckrock collector for all FOIA requests @@ -132,7 +132,7 @@ async def start_muckrock_all_foia_collector( async def upload_manual_collector( dto: ManualBatchInputDTO, core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> ManualBatchResponseDTO: """ Uploads a manual "collector" with existing data diff --git a/src/api/endpoints/data_source/routes.py b/src/api/endpoints/data_source/routes.py index 25787b85..a657ac18 100644 --- a/src/api/endpoints/data_source/routes.py +++ b/src/api/endpoints/data_source/routes.py @@ -13,6 +13,8 @@ from src.api.endpoints.data_source.by_id.put.request import DataSourcePutRequest from src.api.shared.models.message_response import MessageResponse from src.core.core import AsyncCore +from src.security.dtos.access_info import AccessInfo +from src.security.manager import get_admin_access_info data_sources_router = APIRouter( prefix="/data-sources", @@ -45,6 +47,7 @@ async def get_data_source_by_id( async def update_data_source( url_id: int , request: DataSourcePutRequest, + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await check_is_data_source_url(url_id=url_id, adb_client=async_core.adb_client) @@ -70,6 +73,7 @@ async def get_data_source_agencies( async def add_agency_to_data_source( url_id: int, agency_id: int, + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await add_data_source_agency_link( @@ -83,6 +87,7 @@ async def add_agency_to_data_source( async def remove_agency_from_data_source( url_id: int, agency_id: int, + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await delete_data_source_agency_link( diff --git a/src/api/endpoints/locations/routes.py b/src/api/endpoints/locations/routes.py index 4a0ef096..c86f66b5 100644 --- a/src/api/endpoints/locations/routes.py +++ b/src/api/endpoints/locations/routes.py @@ -5,6 +5,8 @@ from src.api.endpoints.locations.post.request import AddLocationRequestModel from src.api.endpoints.locations.post.response import AddLocationResponseModel from src.core.core import AsyncCore +from src.security.dtos.access_info import AccessInfo +from src.security.manager import get_admin_access_info location_url_router = APIRouter( prefix="/locations", @@ -15,6 +17,7 @@ @location_url_router.post("") async def create_location( request: AddLocationRequestModel, + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> AddLocationResponseModel: return await async_core.adb_client.run_query_builder( diff --git a/src/api/endpoints/meta_url/routes.py b/src/api/endpoints/meta_url/routes.py index 82a36756..790fd519 100644 --- a/src/api/endpoints/meta_url/routes.py +++ b/src/api/endpoints/meta_url/routes.py @@ -12,6 +12,8 @@ from src.api.endpoints.meta_url.by_id.put.request import UpdateMetaURLRequest from src.api.shared.models.message_response import MessageResponse from src.core.core import AsyncCore +from src.security.dtos.access_info import AccessInfo +from src.security.manager import get_admin_access_info meta_urls_router = APIRouter( prefix="/meta-urls", @@ -35,6 +37,7 @@ async def get_meta_urls( async def update_meta_url( url_id: int, request: UpdateMetaURLRequest, + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await check_is_meta_url(url_id=url_id, adb_client=async_core.adb_client) @@ -61,6 +64,7 @@ async def get_meta_url_agencies( async def add_agency_to_meta_url( url_id: int, agency_id: int, + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await add_meta_url_agency_link( @@ -74,6 +78,7 @@ async def add_agency_to_meta_url( async def remove_agency_from_meta_url( url_id: int, agency_id: int, + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await delete_meta_url_agency_link( diff --git a/src/api/endpoints/metrics/routes.py b/src/api/endpoints/metrics/routes.py index 59fa5906..06c09de3 100644 --- a/src/api/endpoints/metrics/routes.py +++ b/src/api/endpoints/metrics/routes.py @@ -10,7 +10,7 @@ from src.api.endpoints.metrics.dtos.get.urls.breakdown.pending import GetMetricsURLsBreakdownPendingResponseDTO from src.api.endpoints.metrics.dtos.get.urls.breakdown.submitted import GetMetricsURLsBreakdownSubmittedResponseDTO from src.core.core import AsyncCore -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info from src.security.dtos.access_info import AccessInfo metrics_router = APIRouter( @@ -22,14 +22,14 @@ @metrics_router.get("/batches/aggregated") async def get_batches_aggregated_metrics( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> GetMetricsBatchesAggregatedResponseDTO: return await core.get_batches_aggregated_metrics() @metrics_router.get("/batches/breakdown") async def get_batches_breakdown_metrics( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), page: int = Query( description="The page number", default=1 @@ -40,34 +40,34 @@ async def get_batches_breakdown_metrics( @metrics_router.get("/urls/aggregate") async def get_urls_aggregated_metrics( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> GetMetricsURLsAggregatedResponseDTO: return await core.get_urls_aggregated_metrics() @metrics_router.get("/urls/aggregate/pending") async def get_urls_aggregated_pending_metrics( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> GetMetricsURLsAggregatedPendingResponseDTO: return await core.get_urls_aggregated_pending_metrics() @metrics_router.get("/urls/breakdown/submitted") async def get_urls_breakdown_submitted_metrics( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> GetMetricsURLsBreakdownSubmittedResponseDTO: return await core.get_urls_breakdown_submitted_metrics() @metrics_router.get("/urls/breakdown/pending") async def get_urls_breakdown_pending_metrics( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> GetMetricsURLsBreakdownPendingResponseDTO: return await core.get_urls_breakdown_pending_metrics() @metrics_router.get("/backlog") async def get_backlog_metrics( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> GetMetricsBacklogResponseDTO: return await core.get_backlog_metrics() \ No newline at end of file diff --git a/src/api/endpoints/proposals/routes.py b/src/api/endpoints/proposals/routes.py index 8371c604..83d11f79 100644 --- a/src/api/endpoints/proposals/routes.py +++ b/src/api/endpoints/proposals/routes.py @@ -10,14 +10,14 @@ from src.api.endpoints.proposals.agencies.reject.response import ProposalAgencyRejectResponse from src.core.core import AsyncCore from src.security.dtos.access_info import AccessInfo -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info proposal_router = APIRouter(prefix="/proposal", tags=["Pending"]) @proposal_router.get("/agencies") async def get_pending_agencies( async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> ProposalAgencyGetOuterResponse: return await async_core.adb_client.run_query_builder( ProposalAgencyGetQueryBuilder(), @@ -29,7 +29,7 @@ async def approve_proposed_agency( proposed_agency_id: int = Path( description="Proposed agency ID to approve" ), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> ProposalAgencyApproveResponse: return await async_core.adb_client.run_query_builder( ProposalAgencyApproveQueryBuilder( @@ -45,7 +45,7 @@ async def reject_proposed_agency( proposed_agency_id: int = Path( description="Proposed agency ID to reject" ), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> ProposalAgencyRejectResponse: return await async_core.adb_client.run_query_builder( ProposalAgencyRejectQueryBuilder( diff --git a/src/api/endpoints/root.py b/src/api/endpoints/root.py index 03b05ed4..044c0a5f 100644 --- a/src/api/endpoints/root.py +++ b/src/api/endpoints/root.py @@ -1,6 +1,6 @@ from fastapi import APIRouter, Query, Depends -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info from src.security.dtos.access_info import AccessInfo root_router = APIRouter(prefix="", tags=["Root"]) @@ -8,7 +8,7 @@ @root_router.get("/") async def root( test: str = Query(description="A test parameter"), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> dict[str, str]: """ A simple root endpoint for testing and pinging diff --git a/src/api/endpoints/search/routes.py b/src/api/endpoints/search/routes.py index 58b661e8..aa3c730b 100644 --- a/src/api/endpoints/search/routes.py +++ b/src/api/endpoints/search/routes.py @@ -8,7 +8,7 @@ from src.api.endpoints.search.dtos.response import SearchURLResponse from src.core.core import AsyncCore from src.db.models.impl.agency.enums import JurisdictionType -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info from src.security.dtos.access_info import AccessInfo search_router = APIRouter(prefix="/search", tags=["Search"]) @@ -17,7 +17,7 @@ @search_router.get("/url") async def search_url( url: str = Query(description="The URL to search for"), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> SearchURLResponse: """ @@ -44,7 +44,7 @@ async def search_agency( description="The page to search for", default=1 ), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> list[AgencySearchResponse]: if query is None and location_id is None and jurisdiction_type is None: diff --git a/src/api/endpoints/submit/routes.py b/src/api/endpoints/submit/routes.py index dec7e2aa..b7e2344c 100644 --- a/src/api/endpoints/submit/routes.py +++ b/src/api/endpoints/submit/routes.py @@ -14,7 +14,7 @@ from src.api.endpoints.submit.url.queries.core import SubmitURLQueryBuilder from src.core.core import AsyncCore from src.security.dtos.access_info import AccessInfo -from src.security.manager import get_access_info, get_standard_user_access_info +from src.security.manager import get_admin_access_info, get_standard_user_access_info submit_router = APIRouter(prefix="/submit", tags=["Submit"]) @@ -23,7 +23,7 @@ ) async def submit_url( request: URLSubmissionRequest, - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_standard_user_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> URLSubmissionResponse: return await async_core.adb_client.run_query_builder( diff --git a/src/api/endpoints/task/routes.py b/src/api/endpoints/task/routes.py index 23f52999..3bb039b7 100644 --- a/src/api/endpoints/task/routes.py +++ b/src/api/endpoints/task/routes.py @@ -9,7 +9,7 @@ from src.db.enums import TaskType from src.core.core import AsyncCore from src.core.enums import BatchStatus -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info from src.security.dtos.access_info import AccessInfo task_router = APIRouter( @@ -34,7 +34,7 @@ async def get_tasks( default=None ), async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> GetTasksResponse: return await async_core.get_tasks( page=page, @@ -45,7 +45,7 @@ async def get_tasks( @task_router.get("/status") async def get_task_status( async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> GetTaskStatusResponseInfo: return await async_core.get_current_task_status() @@ -53,7 +53,7 @@ async def get_task_status( async def get_task_info( task_id: int = Path(description="The task id"), async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> TaskInfo: return await async_core.get_task_info(task_id) diff --git a/src/api/endpoints/url/routes.py b/src/api/endpoints/url/routes.py index 7d184e6e..77a0a749 100644 --- a/src/api/endpoints/url/routes.py +++ b/src/api/endpoints/url/routes.py @@ -6,7 +6,7 @@ from src.api.endpoints.url.get.dto import GetURLsResponseInfo from src.api.shared.models.message_response import MessageResponse from src.core.core import AsyncCore -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info from src.security.dtos.access_info import AccessInfo url_router = APIRouter( @@ -26,7 +26,7 @@ async def get_urls( default=False ), async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> GetURLsResponseInfo: result = await async_core.get_urls(page=page, errors=errors) return result @@ -50,6 +50,7 @@ async def get_url_screenshot( async def delete_url( url_id: int, async_core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> MessageResponse: await async_core.adb_client.run_query_builder( DeleteURLQueryBuilder(url_id=url_id) diff --git a/src/security/manager.py b/src/security/manager.py index abeade07..8ec7996a 100644 --- a/src/security/manager.py +++ b/src/security/manager.py @@ -64,7 +64,7 @@ def check_access( oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") -def get_access_info( +def get_admin_access_info( token: Annotated[str, Depends(oauth2_scheme)] ) -> AccessInfo: return SecurityManager().check_access(token, Permissions.SOURCE_COLLECTOR) diff --git a/tests/automated/integration/conftest.py b/tests/automated/integration/conftest.py index 22537d20..64f83459 100644 --- a/tests/automated/integration/conftest.py +++ b/tests/automated/integration/conftest.py @@ -19,7 +19,7 @@ from src.db.models.impl.url.core.sqlalchemy import URL from src.security.dtos.access_info import AccessInfo from src.security.enums import Permissions -from src.security.manager import get_access_info, get_standard_user_access_info +from src.security.manager import get_admin_access_info, get_standard_user_access_info from tests.automated.integration.api._helpers.RequestValidator import RequestValidator from tests.helpers.api_test_helper import APITestHelper from tests.helpers.data_creator.core import DBDataCreator @@ -134,7 +134,7 @@ def override_access_info() -> AccessInfo: @pytest.fixture(scope="session") def client(disable_task_flags) -> Generator[TestClient, None, None]: with TestClient(app) as c: - app.dependency_overrides[get_access_info] = override_access_info + app.dependency_overrides[get_admin_access_info] = override_access_info app.dependency_overrides[get_standard_user_access_info] = override_access_info async_core: AsyncCore = c.app.state.async_core diff --git a/tests/automated/unit/security_manager/test_security_manager.py b/tests/automated/unit/security_manager/test_security_manager.py index 66399d7f..63a6ea01 100644 --- a/tests/automated/unit/security_manager/test_security_manager.py +++ b/tests/automated/unit/security_manager/test_security_manager.py @@ -4,7 +4,7 @@ from fastapi import HTTPException from jwt import InvalidTokenError -from src.security.manager import SecurityManager, get_access_info +from src.security.manager import SecurityManager, get_admin_access_info from src.security.dtos.access_info import AccessInfo from src.security.enums import Permissions @@ -64,6 +64,6 @@ def test_check_access_failure(mock_get_secret_key, mock_jwt_decode): def test_get_access_info(mock_get_secret_key, mock_jwt_decode): - access_info = get_access_info(token=VALID_TOKEN) + access_info = get_admin_access_info(token=VALID_TOKEN) assert access_info.user_id == 1 assert Permissions.SOURCE_COLLECTOR in access_info.permissions From f26b1ead56f6fbbfc9112838a475f7eb2710854a Mon Sep 17 00:00:00 2001 From: Max Chis Date: Mon, 29 Dec 2025 14:07:10 -0500 Subject: [PATCH 17/24] Clean up and refactor --- .../pdap/_templates/request_builder.py | 21 +++++++++++++++-- src/external/pdap/client.py | 23 ------------------- src/external/pdap/impl/sync/follows/core.py | 2 +- .../api/proposals/test_agencies.py | 1 + .../external/pdap/test_check_for_duplicate.py | 9 -------- 5 files changed, 21 insertions(+), 35 deletions(-) delete mode 100644 tests/manual/external/pdap/test_check_for_duplicate.py diff --git a/src/external/pdap/_templates/request_builder.py b/src/external/pdap/_templates/request_builder.py index 887e2cfd..d944efdf 100644 --- a/src/external/pdap/_templates/request_builder.py +++ b/src/external/pdap/_templates/request_builder.py @@ -38,10 +38,27 @@ async def post( raise Exception(f"Failed to make request to PDAP: {response_info.data}") return response_info.data + async def post_v2( + self, + url: str, + request_model: BaseModel, + return_model_type: type[T] + ) -> T: + request_info = RequestInfo( + type_=RequestType.POST, + url=url, + json_=request_model.model_dump(mode='json'), + headers=await self.access_manager.jwt_header() + ) + response_info: ResponseInfo = await self.access_manager.make_request(request_info) + if response_info.status_code != HTTPStatus.OK: + raise Exception(f"Failed to make request to PDAP: {response_info.data}") + return return_model_type(**response_info.data) + async def get( self, url: str, - model: type[T] + return_model_type: type[T] ) -> T: request_info = RequestInfo( type_=RequestType.GET, @@ -51,7 +68,7 @@ async def get( response_info: ResponseInfo = await self.access_manager.make_request(request_info) if response_info.status_code != HTTPStatus.OK: raise Exception(f"Failed to make request to PDAP: {response_info.data}") - return model(**response_info.data) + return return_model_type(**response_info.data) @abstractmethod async def inner_logic(self) -> Any: diff --git a/src/external/pdap/client.py b/src/external/pdap/client.py index 38c67e08..d3cb1209 100644 --- a/src/external/pdap/client.py +++ b/src/external/pdap/client.py @@ -22,26 +22,3 @@ async def run_request_builder( request_builder: PDAPRequestBuilderBase ) -> Any: return await request_builder.run(self.access_manager) - - async def is_url_duplicate( - self, - url_to_check: str - ) -> bool: - """ - Check if a URL is unique. Returns duplicate info otherwise - """ - url: str = f"{self.access_manager.data_sources_url}/v2/check/unique-url" - - request_info = RequestInfo( - type_=RequestType.GET, - url=url, - params={ - "url": url_to_check - } - ) - response_info: ResponseInfo = await self.access_manager.make_request(request_info) - duplicates: list[UniqueURLDuplicateInfo] = [ - UniqueURLDuplicateInfo(**entry) for entry in response_info.data["duplicates"] - ] - is_duplicate: bool = (len(duplicates) != 0) - return is_duplicate diff --git a/src/external/pdap/impl/sync/follows/core.py b/src/external/pdap/impl/sync/follows/core.py index 707ac8c9..8b442e56 100644 --- a/src/external/pdap/impl/sync/follows/core.py +++ b/src/external/pdap/impl/sync/follows/core.py @@ -8,6 +8,6 @@ async def inner_logic(self) -> list[SyncFollowGetInnerResponse]: url: str = self.build_url("v3/sync/follows") response: SyncFollowGetOuterResponse = await self.get( url=url, - model=SyncFollowGetOuterResponse + return_model_type=SyncFollowGetOuterResponse ) return response.follows diff --git a/tests/automated/integration/api/proposals/test_agencies.py b/tests/automated/integration/api/proposals/test_agencies.py index 70a97118..31037f12 100644 --- a/tests/automated/integration/api/proposals/test_agencies.py +++ b/tests/automated/integration/api/proposals/test_agencies.py @@ -85,6 +85,7 @@ async def test_agencies( agencies: list[Agency] = await adb_client.get_all(Agency) assert len(agencies) == 1 agency = agencies[0] + assert agency.id == agency_id assert agency.name == request.name assert agency.agency_type == request.agency_type assert agency.jurisdiction_type == request.jurisdiction_type diff --git a/tests/manual/external/pdap/test_check_for_duplicate.py b/tests/manual/external/pdap/test_check_for_duplicate.py deleted file mode 100644 index 25a8bc52..00000000 --- a/tests/manual/external/pdap/test_check_for_duplicate.py +++ /dev/null @@ -1,9 +0,0 @@ -import pytest - - -@pytest.mark.asyncio -async def test_check_for_duplicate(pdap_client): - - response = await pdap_client.is_url_duplicate(url_to_check="example.com") - - print(response) From 78923c40536395bff2f7705003d0439d7a754b68 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Mon, 29 Dec 2025 16:10:46 -0500 Subject: [PATCH 18/24] Add agency proposal edit logic --- .../agencies/{approve => by_id}/__init__.py | 0 .../{get => by_id/approve}/__init__.py | 0 .../agencies/{ => by_id}/approve/query.py | 2 +- .../agencies/{ => by_id}/approve/response.py | 0 .../{reject => by_id/locations}/__init__.py | 0 .../by_id/locations/delete/__init__.py | 0 .../agencies/by_id/locations/delete/query.py | 30 +++++ .../agencies/by_id/locations/get/__init__.py | 0 .../agencies/by_id/locations/get/query.py | 41 +++++++ .../agencies/by_id/locations/get/response.py | 7 ++ .../agencies/by_id/locations/post/__init__.py | 0 .../agencies/by_id/locations/post/query.py | 23 ++++ .../proposals/agencies/by_id/put/__init__.py | 0 .../proposals/agencies/by_id/put/query.py | 45 ++++++++ .../proposals/agencies/by_id/put/request.py | 10 ++ .../agencies/by_id/reject/__init__.py | 0 .../agencies/{ => by_id}/reject/query.py | 4 +- .../agencies/{ => by_id}/reject/request.py | 0 .../agencies/{ => by_id}/reject/response.py | 0 .../proposals/agencies/root/__init__.py | 0 .../proposals/agencies/root/get/__init__.py | 0 .../agencies/{ => root}/get/query.py | 2 +- .../agencies/{ => root}/get/response.py | 0 src/api/endpoints/proposals/routes.py | 78 +++++++++++-- .../api/proposals/test_agencies.py | 103 ++++++++++++++++-- 25 files changed, 324 insertions(+), 21 deletions(-) rename src/api/endpoints/proposals/agencies/{approve => by_id}/__init__.py (100%) rename src/api/endpoints/proposals/agencies/{get => by_id/approve}/__init__.py (100%) rename src/api/endpoints/proposals/agencies/{ => by_id}/approve/query.py (98%) rename src/api/endpoints/proposals/agencies/{ => by_id}/approve/response.py (100%) rename src/api/endpoints/proposals/agencies/{reject => by_id/locations}/__init__.py (100%) create mode 100644 src/api/endpoints/proposals/agencies/by_id/locations/delete/__init__.py create mode 100644 src/api/endpoints/proposals/agencies/by_id/locations/delete/query.py create mode 100644 src/api/endpoints/proposals/agencies/by_id/locations/get/__init__.py create mode 100644 src/api/endpoints/proposals/agencies/by_id/locations/get/query.py create mode 100644 src/api/endpoints/proposals/agencies/by_id/locations/get/response.py create mode 100644 src/api/endpoints/proposals/agencies/by_id/locations/post/__init__.py create mode 100644 src/api/endpoints/proposals/agencies/by_id/locations/post/query.py create mode 100644 src/api/endpoints/proposals/agencies/by_id/put/__init__.py create mode 100644 src/api/endpoints/proposals/agencies/by_id/put/query.py create mode 100644 src/api/endpoints/proposals/agencies/by_id/put/request.py create mode 100644 src/api/endpoints/proposals/agencies/by_id/reject/__init__.py rename src/api/endpoints/proposals/agencies/{ => by_id}/reject/query.py (93%) rename src/api/endpoints/proposals/agencies/{ => by_id}/reject/request.py (100%) rename src/api/endpoints/proposals/agencies/{ => by_id}/reject/response.py (100%) create mode 100644 src/api/endpoints/proposals/agencies/root/__init__.py create mode 100644 src/api/endpoints/proposals/agencies/root/get/__init__.py rename src/api/endpoints/proposals/agencies/{ => root}/get/query.py (94%) rename src/api/endpoints/proposals/agencies/{ => root}/get/response.py (100%) diff --git a/src/api/endpoints/proposals/agencies/approve/__init__.py b/src/api/endpoints/proposals/agencies/by_id/__init__.py similarity index 100% rename from src/api/endpoints/proposals/agencies/approve/__init__.py rename to src/api/endpoints/proposals/agencies/by_id/__init__.py diff --git a/src/api/endpoints/proposals/agencies/get/__init__.py b/src/api/endpoints/proposals/agencies/by_id/approve/__init__.py similarity index 100% rename from src/api/endpoints/proposals/agencies/get/__init__.py rename to src/api/endpoints/proposals/agencies/by_id/approve/__init__.py diff --git a/src/api/endpoints/proposals/agencies/approve/query.py b/src/api/endpoints/proposals/agencies/by_id/approve/query.py similarity index 98% rename from src/api/endpoints/proposals/agencies/approve/query.py rename to src/api/endpoints/proposals/agencies/by_id/approve/query.py index 3c08954e..07dd21ff 100644 --- a/src/api/endpoints/proposals/agencies/approve/query.py +++ b/src/api/endpoints/proposals/agencies/by_id/approve/query.py @@ -3,7 +3,7 @@ from sqlalchemy.exc import NoResultFound from sqlalchemy.ext.asyncio import AsyncSession -from src.api.endpoints.proposals.agencies.approve.response import ProposalAgencyApproveResponse +from src.api.endpoints.proposals.agencies.by_id.approve.response import ProposalAgencyApproveResponse from src.db.models.impl.agency.enums import JurisdictionType, AgencyType from src.db.models.impl.agency.sqlalchemy import Agency from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation diff --git a/src/api/endpoints/proposals/agencies/approve/response.py b/src/api/endpoints/proposals/agencies/by_id/approve/response.py similarity index 100% rename from src/api/endpoints/proposals/agencies/approve/response.py rename to src/api/endpoints/proposals/agencies/by_id/approve/response.py diff --git a/src/api/endpoints/proposals/agencies/reject/__init__.py b/src/api/endpoints/proposals/agencies/by_id/locations/__init__.py similarity index 100% rename from src/api/endpoints/proposals/agencies/reject/__init__.py rename to src/api/endpoints/proposals/agencies/by_id/locations/__init__.py diff --git a/src/api/endpoints/proposals/agencies/by_id/locations/delete/__init__.py b/src/api/endpoints/proposals/agencies/by_id/locations/delete/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/proposals/agencies/by_id/locations/delete/query.py b/src/api/endpoints/proposals/agencies/by_id/locations/delete/query.py new file mode 100644 index 00000000..1ce236cb --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/locations/delete/query.py @@ -0,0 +1,30 @@ +from sqlalchemy import delete +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.db.models.impl.proposals.agency_.link__location import ProposalLinkAgencyLocation +from src.db.queries.base.builder import QueryBuilderBase + + +class DeleteProposalAgencyLocationQueryBuilder(QueryBuilderBase): + + def __init__( + self, + agency_id: int, + location_id: int, + ): + super().__init__() + self.agency_id = agency_id + self.location_id = location_id + + async def run(self, session: AsyncSession) -> None: + statement = ( + delete(ProposalLinkAgencyLocation) + .where( + (ProposalLinkAgencyLocation.proposal_agency_id == self.agency_id) + & (ProposalLinkAgencyLocation.location_id == self.location_id) + ) + ) + + await session.execute(statement) + diff --git a/src/api/endpoints/proposals/agencies/by_id/locations/get/__init__.py b/src/api/endpoints/proposals/agencies/by_id/locations/get/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/proposals/agencies/by_id/locations/get/query.py b/src/api/endpoints/proposals/agencies/by_id/locations/get/query.py new file mode 100644 index 00000000..bc45f8ba --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/locations/get/query.py @@ -0,0 +1,41 @@ +from typing import Sequence + +from sqlalchemy import select, RowMapping +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.agencies.by_id.locations.get.response import AgencyGetLocationsResponse +from src.api.endpoints.proposals.agencies.by_id.locations.get.response import ProposalAgencyGetLocationsOuterResponse +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.db.models.impl.proposals.agency_.link__location import ProposalLinkAgencyLocation +from src.db.models.views.location_expanded import LocationExpandedView +from src.db.queries.base.builder import QueryBuilderBase + + +class GetProposalAgencyLocationsQueryBuilder(QueryBuilderBase): + + def __init__( + self, + agency_id: int, + ): + super().__init__() + self.agency_id = agency_id + + async def run(self, session: AsyncSession) -> ProposalAgencyGetLocationsOuterResponse: + query = ( + select( + ProposalLinkAgencyLocation.location_id, + LocationExpandedView.full_display_name + ) + .where( + ProposalLinkAgencyLocation.proposal_agency_id == self.agency_id + ) + .join( + LocationExpandedView, + LocationExpandedView.id == ProposalLinkAgencyLocation.location_id + ) + ) + + result: Sequence[RowMapping] = await self.sh.mappings(session, query=query) + return ProposalAgencyGetLocationsOuterResponse( + results=[AgencyGetLocationsResponse(**row) for row in result] + ) \ No newline at end of file diff --git a/src/api/endpoints/proposals/agencies/by_id/locations/get/response.py b/src/api/endpoints/proposals/agencies/by_id/locations/get/response.py new file mode 100644 index 00000000..f6175e6d --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/locations/get/response.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel + +from src.api.endpoints.agencies.by_id.locations.get.response import AgencyGetLocationsResponse + + +class ProposalAgencyGetLocationsOuterResponse(BaseModel): + results: list[AgencyGetLocationsResponse] \ No newline at end of file diff --git a/src/api/endpoints/proposals/agencies/by_id/locations/post/__init__.py b/src/api/endpoints/proposals/agencies/by_id/locations/post/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/proposals/agencies/by_id/locations/post/query.py b/src/api/endpoints/proposals/agencies/by_id/locations/post/query.py new file mode 100644 index 00000000..439482e5 --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/locations/post/query.py @@ -0,0 +1,23 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.proposals.agency_.link__location import ProposalLinkAgencyLocation +from src.db.queries.base.builder import QueryBuilderBase + + +class AddProposalAgencyLocationQueryBuilder(QueryBuilderBase): + + def __init__( + self, + agency_id: int, + location_id: int + ): + super().__init__() + self.agency_id = agency_id + self.location_id = location_id + + async def run(self, session: AsyncSession) -> None: + lal = ProposalLinkAgencyLocation( + proposal_agency_id=self.agency_id, + location_id=self.location_id, + ) + session.add(lal) \ No newline at end of file diff --git a/src/api/endpoints/proposals/agencies/by_id/put/__init__.py b/src/api/endpoints/proposals/agencies/by_id/put/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/proposals/agencies/by_id/put/query.py b/src/api/endpoints/proposals/agencies/by_id/put/query.py new file mode 100644 index 00000000..996cd909 --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/put/query.py @@ -0,0 +1,45 @@ +from fastapi import HTTPException +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.proposals.agencies.by_id.put.request import ProposalAgencyPutRequest +from src.db.models.impl.proposals.agency_.core import ProposalAgency +from src.db.queries.base.builder import QueryBuilderBase + + +class UpdateProposalAgencyQueryBuilder(QueryBuilderBase): + + def __init__( + self, + agency_id: int, + request: ProposalAgencyPutRequest, + ): + super().__init__() + self.agency_id = agency_id + self.request = request + + async def run(self, session: AsyncSession) -> None: + + query = ( + select( + ProposalAgency + ) + .where( + ProposalAgency.id == self.agency_id + ) + ) + + agency: ProposalAgency | None = await self.sh.one_or_none(session, query=query) + if not agency: + raise HTTPException(status_code=400, detail="Proposed Agency not found") + + if self.request.name is not None: + agency.name = self.request.name + if self.request.type is not None: + agency.agency_type = self.request.type + if self.request.jurisdiction_type is not None: + agency.jurisdiction_type = self.request.jurisdiction_type + + + + diff --git a/src/api/endpoints/proposals/agencies/by_id/put/request.py b/src/api/endpoints/proposals/agencies/by_id/put/request.py new file mode 100644 index 00000000..4f49f17e --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/put/request.py @@ -0,0 +1,10 @@ +from src.api.shared.models.request_base import RequestBase +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType + + +class ProposalAgencyPutRequest(RequestBase): + name: str | None = None + type: AgencyType | None = None + jurisdiction_type: JurisdictionType | None = None + + diff --git a/src/api/endpoints/proposals/agencies/by_id/reject/__init__.py b/src/api/endpoints/proposals/agencies/by_id/reject/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/proposals/agencies/reject/query.py b/src/api/endpoints/proposals/agencies/by_id/reject/query.py similarity index 93% rename from src/api/endpoints/proposals/agencies/reject/query.py rename to src/api/endpoints/proposals/agencies/by_id/reject/query.py index 0635a58d..e7038b4f 100644 --- a/src/api/endpoints/proposals/agencies/reject/query.py +++ b/src/api/endpoints/proposals/agencies/by_id/reject/query.py @@ -2,8 +2,8 @@ from sqlalchemy import select, RowMapping, update from sqlalchemy.ext.asyncio import AsyncSession -from src.api.endpoints.proposals.agencies.reject.request import ProposalAgencyRejectRequestModel -from src.api.endpoints.proposals.agencies.reject.response import ProposalAgencyRejectResponse +from src.api.endpoints.proposals.agencies.by_id.reject.request import ProposalAgencyRejectRequestModel +from src.api.endpoints.proposals.agencies.by_id.reject.response import ProposalAgencyRejectResponse from src.db.models.impl.proposals.agency_.core import ProposalAgency from src.db.models.impl.proposals.agency_.decision_info import ProposalAgencyDecisionInfo from src.db.models.impl.proposals.enums import ProposalStatus diff --git a/src/api/endpoints/proposals/agencies/reject/request.py b/src/api/endpoints/proposals/agencies/by_id/reject/request.py similarity index 100% rename from src/api/endpoints/proposals/agencies/reject/request.py rename to src/api/endpoints/proposals/agencies/by_id/reject/request.py diff --git a/src/api/endpoints/proposals/agencies/reject/response.py b/src/api/endpoints/proposals/agencies/by_id/reject/response.py similarity index 100% rename from src/api/endpoints/proposals/agencies/reject/response.py rename to src/api/endpoints/proposals/agencies/by_id/reject/response.py diff --git a/src/api/endpoints/proposals/agencies/root/__init__.py b/src/api/endpoints/proposals/agencies/root/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/proposals/agencies/root/get/__init__.py b/src/api/endpoints/proposals/agencies/root/get/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/proposals/agencies/get/query.py b/src/api/endpoints/proposals/agencies/root/get/query.py similarity index 94% rename from src/api/endpoints/proposals/agencies/get/query.py rename to src/api/endpoints/proposals/agencies/root/get/query.py index dde61c90..6f4df84d 100644 --- a/src/api/endpoints/proposals/agencies/get/query.py +++ b/src/api/endpoints/proposals/agencies/root/get/query.py @@ -5,7 +5,7 @@ from sqlalchemy.orm import joinedload from src.api.endpoints.agencies.by_id.locations.get.response import AgencyGetLocationsResponse -from src.api.endpoints.proposals.agencies.get.response import ProposalAgencyGetOuterResponse, ProposalAgencyGetResponse +from src.api.endpoints.proposals.agencies.root.get.response import ProposalAgencyGetOuterResponse, ProposalAgencyGetResponse from src.db.models.impl.proposals.agency_.core import ProposalAgency from src.db.models.impl.proposals.enums import ProposalStatus from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/api/endpoints/proposals/agencies/get/response.py b/src/api/endpoints/proposals/agencies/root/get/response.py similarity index 100% rename from src/api/endpoints/proposals/agencies/get/response.py rename to src/api/endpoints/proposals/agencies/root/get/response.py diff --git a/src/api/endpoints/proposals/routes.py b/src/api/endpoints/proposals/routes.py index 8371c604..147e0501 100644 --- a/src/api/endpoints/proposals/routes.py +++ b/src/api/endpoints/proposals/routes.py @@ -1,13 +1,21 @@ from fastapi import APIRouter, Depends, Path from src.api.dependencies import get_async_core -from src.api.endpoints.proposals.agencies.approve.query import ProposalAgencyApproveQueryBuilder -from src.api.endpoints.proposals.agencies.approve.response import ProposalAgencyApproveResponse -from src.api.endpoints.proposals.agencies.get.query import ProposalAgencyGetQueryBuilder -from src.api.endpoints.proposals.agencies.get.response import ProposalAgencyGetOuterResponse -from src.api.endpoints.proposals.agencies.reject.query import ProposalAgencyRejectQueryBuilder -from src.api.endpoints.proposals.agencies.reject.request import ProposalAgencyRejectRequestModel -from src.api.endpoints.proposals.agencies.reject.response import ProposalAgencyRejectResponse +from src.api.endpoints.agencies.by_id.locations.get.response import AgencyGetLocationsResponse +from src.api.endpoints.proposals.agencies.by_id.approve.query import ProposalAgencyApproveQueryBuilder +from src.api.endpoints.proposals.agencies.by_id.approve.response import ProposalAgencyApproveResponse +from src.api.endpoints.proposals.agencies.by_id.locations.delete.query import DeleteProposalAgencyLocationQueryBuilder +from src.api.endpoints.proposals.agencies.by_id.locations.get.query import GetProposalAgencyLocationsQueryBuilder +from src.api.endpoints.proposals.agencies.by_id.locations.get.response import ProposalAgencyGetLocationsOuterResponse +from src.api.endpoints.proposals.agencies.by_id.locations.post.query import AddProposalAgencyLocationQueryBuilder +from src.api.endpoints.proposals.agencies.by_id.put.query import UpdateProposalAgencyQueryBuilder +from src.api.endpoints.proposals.agencies.by_id.put.request import ProposalAgencyPutRequest +from src.api.endpoints.proposals.agencies.root.get.query import ProposalAgencyGetQueryBuilder +from src.api.endpoints.proposals.agencies.root.get.response import ProposalAgencyGetOuterResponse +from src.api.endpoints.proposals.agencies.by_id.reject.query import ProposalAgencyRejectQueryBuilder +from src.api.endpoints.proposals.agencies.by_id.reject.request import ProposalAgencyRejectRequestModel +from src.api.endpoints.proposals.agencies.by_id.reject.response import ProposalAgencyRejectResponse +from src.api.shared.models.message_response import MessageResponse from src.core.core import AsyncCore from src.security.dtos.access_info import AccessInfo from src.security.manager import get_access_info @@ -53,4 +61,58 @@ async def reject_proposed_agency( deciding_user_id=access_info.user_id, request_model=request, ) - ) \ No newline at end of file + ) + +@proposal_router.get("/agencies/{proposed_agency_id}/locations") +async def get_agency_locations( + proposed_agency_id: int = Path( + description="Agency ID to get locations for" + ), + async_core: AsyncCore = Depends(get_async_core), +) -> ProposalAgencyGetLocationsOuterResponse: + return await async_core.adb_client.run_query_builder( + GetProposalAgencyLocationsQueryBuilder(agency_id=proposed_agency_id) + ) + +@proposal_router.post("/agencies/{proposed_agency_id}/locations/{location_id}") +async def add_location_to_agency( + proposed_agency_id: int = Path( + description="Agency ID to add location to" + ), + location_id: int = Path( + description="Location ID to add" + ), + async_core: AsyncCore = Depends(get_async_core), +) -> MessageResponse: + await async_core.adb_client.run_query_builder( + AddProposalAgencyLocationQueryBuilder(agency_id=proposed_agency_id, location_id=location_id) + ) + return MessageResponse(message="Location added to agency.") + +@proposal_router.delete("/agencies/{proposed_agency_id}/locations/{location_id}") +async def remove_location_from_agency( + proposed_agency_id: int = Path( + description="Agency ID to remove location from" + ), + location_id: int = Path( + description="Location ID to remove" + ), + async_core: AsyncCore = Depends(get_async_core), +) -> MessageResponse: + await async_core.adb_client.run_query_builder( + DeleteProposalAgencyLocationQueryBuilder(agency_id=proposed_agency_id, location_id=location_id) + ) + return MessageResponse(message="Location removed from agency.") + +@proposal_router.put("/agencies/{proposed_agency_id}") +async def update_agency( + request: ProposalAgencyPutRequest, + proposed_agency_id: int = Path( + description="Agency ID to update" + ), + async_core: AsyncCore = Depends(get_async_core), +) -> MessageResponse: + await async_core.adb_client.run_query_builder( + UpdateProposalAgencyQueryBuilder(agency_id=proposed_agency_id, request=request) + ) + return MessageResponse(message="Proposed agency updated.") diff --git a/tests/automated/integration/api/proposals/test_agencies.py b/tests/automated/integration/api/proposals/test_agencies.py index 31037f12..d1a2d2ab 100644 --- a/tests/automated/integration/api/proposals/test_agencies.py +++ b/tests/automated/integration/api/proposals/test_agencies.py @@ -1,12 +1,15 @@ import pytest -from src.api.endpoints.proposals.agencies.approve.response import ProposalAgencyApproveResponse -from src.api.endpoints.proposals.agencies.get.response import ProposalAgencyGetOuterResponse -from src.api.endpoints.proposals.agencies.reject.request import ProposalAgencyRejectRequestModel -from src.api.endpoints.proposals.agencies.reject.response import ProposalAgencyRejectResponse +from src.api.endpoints.proposals.agencies.by_id.approve.response import ProposalAgencyApproveResponse +from src.api.endpoints.proposals.agencies.by_id.locations.get.response import ProposalAgencyGetLocationsOuterResponse +from src.api.endpoints.proposals.agencies.by_id.put.request import ProposalAgencyPutRequest +from src.api.endpoints.proposals.agencies.root.get.response import ProposalAgencyGetOuterResponse +from src.api.endpoints.proposals.agencies.by_id.reject.request import ProposalAgencyRejectRequestModel +from src.api.endpoints.proposals.agencies.by_id.reject.response import ProposalAgencyRejectResponse from src.api.endpoints.submit.agency.enums import AgencyProposalRequestStatus from src.api.endpoints.submit.agency.request import SubmitAgencyRequestModel from src.api.endpoints.submit.agency.response import SubmitAgencyProposalResponse +from src.api.shared.models.message_response import MessageResponse from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.agency.enums import AgencyType, JurisdictionType from src.db.models.impl.agency.sqlalchemy import Agency @@ -16,13 +19,15 @@ from tests.helpers.api_test_helper import APITestHelper from tests.helpers.data_creator.models.creation_info.county import CountyCreationInfo from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo +from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo @pytest.mark.asyncio async def test_agencies( api_test_helper: APITestHelper, pittsburgh_locality: LocalityCreationInfo, - allegheny_county: CountyCreationInfo + allegheny_county: CountyCreationInfo, + pennsylvania: USStateCreationInfo ): request = SubmitAgencyRequestModel( name="test_agency", @@ -71,6 +76,78 @@ async def test_agencies( assert [loc.location_id for loc in proposal.locations] == request.location_ids assert proposal.created_at is not None + # Edit Endpoint + edit_response: MessageResponse = rv.put_v3( + f"/proposal/agencies/{proposal_id}", + expected_model=MessageResponse, + json=ProposalAgencyPutRequest( + name='Modified Agency', + type=AgencyType.AGGREGATED, + jurisdiction_type=JurisdictionType.COUNTY, + ).model_dump(mode="json") + ) + assert edit_response.message == "Proposed agency updated." + + # Confirm agency proposal is updated + get_response_1p5: ProposalAgencyGetOuterResponse = rv.get_v3( + "/proposal/agencies", + expected_model=ProposalAgencyGetOuterResponse + ) + # Confirm agency is in response + assert len(get_response_1p5.results) == 1 + proposal = get_response_1p5.results[0] + assert proposal.id == proposal_id + assert proposal.name == 'Modified Agency' + assert proposal.proposing_user_id == MOCK_USER_ID + assert proposal.agency_type == AgencyType.AGGREGATED + assert proposal.jurisdiction_type == JurisdictionType.COUNTY + assert [loc.location_id for loc in proposal.locations] == request.location_ids + assert proposal.created_at is not None + + + # Get locations for endpoint + get_locations_response: ProposalAgencyGetLocationsOuterResponse = rv.get_v3( + f"/proposal/agencies/{proposal_id}/locations", + expected_model=ProposalAgencyGetLocationsOuterResponse + ) + assert len(get_locations_response.results) == 2 + # Check Location IDs match + assert {loc.location_id for loc in get_locations_response.results} == { + allegheny_county.location_id, + pittsburgh_locality.location_id + } + + # Add location to endpoint + add_locations_response: MessageResponse = rv.post_v3( + f"/proposal/agencies/{proposal_id}/locations/{pennsylvania.location_id}" + ) + # Check that location is added + get_locations_response: ProposalAgencyGetLocationsOuterResponse = rv.get_v3( + f"/proposal/agencies/{proposal_id}/locations", + expected_model=ProposalAgencyGetLocationsOuterResponse + ) + assert len(get_locations_response.results) == 3 + assert {loc.location_id for loc in get_locations_response.results} == { + allegheny_county.location_id, + pittsburgh_locality.location_id, + pennsylvania.location_id + } + + # Remove Location from endpoint + remove_location_response: MessageResponse = rv.delete_v3( + f"/proposal/agencies/{proposal_id}/locations/{pennsylvania.location_id}" + ) + # Check that location is removed + get_locations_response: ProposalAgencyGetLocationsOuterResponse = rv.get_v3( + f"/proposal/agencies/{proposal_id}/locations", + expected_model=ProposalAgencyGetLocationsOuterResponse + ) + assert len(get_locations_response.results) == 2 + assert {loc.location_id for loc in get_locations_response.results} == { + allegheny_county.location_id, + pittsburgh_locality.location_id, + } + # Call APPROVE endpoint approve_response: ProposalAgencyApproveResponse = rv.post_v3( f"/proposal/agencies/{proposal_id}/approve", @@ -86,9 +163,9 @@ async def test_agencies( assert len(agencies) == 1 agency = agencies[0] assert agency.id == agency_id - assert agency.name == request.name - assert agency.agency_type == request.agency_type - assert agency.jurisdiction_type == request.jurisdiction_type + assert agency.name == "Modified Agency" + assert agency.agency_type == AgencyType.AGGREGATED + assert agency.jurisdiction_type == JurisdictionType.COUNTY links: list[LinkAgencyLocation] = await adb_client.get_all(LinkAgencyLocation) assert len(links) == 2 @@ -107,7 +184,15 @@ async def test_agencies( submit_response_accepted_duplicate: SubmitAgencyProposalResponse = rv.post_v3( "/submit/agency", expected_model=SubmitAgencyProposalResponse, - json=request.model_dump(mode="json") + json=SubmitAgencyRequestModel( + name='Modified Agency', + agency_type=AgencyType.AGGREGATED, + jurisdiction_type=JurisdictionType.COUNTY, + location_ids=[ + allegheny_county.location_id, + pittsburgh_locality.location_id + ] + ).model_dump(mode="json") ) assert submit_response_accepted_duplicate.status == AgencyProposalRequestStatus.ACCEPTED_DUPLICATE assert submit_response_accepted_duplicate.proposal_id is None From ff9bf380affe737c6b22999d16985b84991c719b Mon Sep 17 00:00:00 2001 From: Max Chis Date: Tue, 30 Dec 2025 15:18:24 -0500 Subject: [PATCH 19/24] Fix attribute bugs in agency queries --- src/api/endpoints/agencies/by_id/put/query.py | 2 +- src/api/endpoints/agencies/root/get/query.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/api/endpoints/agencies/by_id/put/query.py b/src/api/endpoints/agencies/by_id/put/query.py index 942203fc..b3f3f75b 100644 --- a/src/api/endpoints/agencies/by_id/put/query.py +++ b/src/api/endpoints/agencies/by_id/put/query.py @@ -36,7 +36,7 @@ async def run(self, session: AsyncSession) -> None: if self.request.name is not None: agency.name = self.request.name if self.request.type is not None: - agency.type = self.request.type + agency.agency_type = self.request.type if self.request.jurisdiction_type is not None: agency.jurisdiction_type = self.request.jurisdiction_type diff --git a/src/api/endpoints/agencies/root/get/query.py b/src/api/endpoints/agencies/root/get/query.py index ae3b943d..12099906 100644 --- a/src/api/endpoints/agencies/root/get/query.py +++ b/src/api/endpoints/agencies/root/get/query.py @@ -26,6 +26,9 @@ async def run(self, session: AsyncSession) -> list[AgencyGetResponse]: .options( selectinload(Agency.locations) ) + .order_by( + Agency.id.asc() + ) .offset((self.page - 1) * 100) .limit(100) ) From bc1531e332f9b1cf43ec9b53e2e8298d3bfbab2b Mon Sep 17 00:00:00 2001 From: maxachis Date: Wed, 31 Dec 2025 12:27:07 -0500 Subject: [PATCH 20/24] Begin draft --- ...59ce7d0772b_remove_url_status_attribute.py | 31 +++++++ .../queries/get_annotation_batch_info.py | 1 - .../annotate/_shared/queries/helper.py | 1 - .../aggregated/query/url_error/query.py | 2 +- .../metrics/urls/breakdown/query/core.py | 3 +- .../data_source/models/response/duplicate.py | 4 +- .../submit/data_source/queries/duplicate.py | 8 +- src/api/endpoints/url/get/query.py | 88 +++++++++++++++---- .../impl/data_sources/add/queries/get.py | 11 ++- .../impl/sync_to_ds/shared/convert.py | 11 ++- .../impl/update_url_status/operator.py | 15 ---- .../scheduled/impl/update_url_status/query.py | 49 ----------- src/core/tasks/scheduled/loader.py | 10 --- .../operators/auto_relevant/queries/cte.py | 1 - .../tasks/url/operators/html/queries/get.py | 34 ++++--- src/db/client/async_.py | 1 - .../batch_url_status}/__init__.py | 0 .../batch_url_status/enums.py | 8 ++ .../batch_url_status/sqlalchemy.py | 14 +++ .../url_status}/__init__.py | 0 .../materialized_views/url_status/enums.py | 20 +++++ .../url_status/sqlalchemy.py | 15 ++++ .../core/common/annotation_exists_/core.py | 1 - .../url_counts/builder.py | 16 +--- .../url_counts/cte/duplicate.py | 29 ------ .../core/metrics/urls/aggregated/pending.py | 4 +- src/db/statement_composer.py | 77 +++++++++------- .../api/submit/data_source/test_core.py | 1 - .../db/client/approve_url/test_basic.py | 1 - .../impl/update_url_status/test_core.py | 77 ---------------- .../tasks/url/impl/html/check/manager.py | 6 +- .../tasks/url/impl/probe/check/manager.py | 8 +- 32 files changed, 262 insertions(+), 285 deletions(-) create mode 100644 alembic/versions/2025_12_31_1106-759ce7d0772b_remove_url_status_attribute.py delete mode 100644 src/core/tasks/scheduled/impl/update_url_status/operator.py delete mode 100644 src/core/tasks/scheduled/impl/update_url_status/query.py rename src/{core/tasks/scheduled/impl/update_url_status => db/models/materialized_views/batch_url_status}/__init__.py (100%) create mode 100644 src/db/models/materialized_views/batch_url_status/enums.py create mode 100644 src/db/models/materialized_views/batch_url_status/sqlalchemy.py rename {tests/automated/integration/tasks/scheduled/impl/update_url_status => src/db/models/materialized_views/url_status}/__init__.py (100%) create mode 100644 src/db/models/materialized_views/url_status/enums.py create mode 100644 src/db/models/materialized_views/url_status/sqlalchemy.py delete mode 100644 src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/duplicate.py delete mode 100644 tests/automated/integration/tasks/scheduled/impl/update_url_status/test_core.py diff --git a/alembic/versions/2025_12_31_1106-759ce7d0772b_remove_url_status_attribute.py b/alembic/versions/2025_12_31_1106-759ce7d0772b_remove_url_status_attribute.py new file mode 100644 index 00000000..379c045a --- /dev/null +++ b/alembic/versions/2025_12_31_1106-759ce7d0772b_remove_url_status_attribute.py @@ -0,0 +1,31 @@ +"""Remove URL Status attribute + +Revision ID: 759ce7d0772b +Revises: 42933d84aa52 +Create Date: 2025-12-31 11:06:39.037486 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '759ce7d0772b' +down_revision: Union[str, None] = '42933d84aa52' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.drop_column( + table_name="urls", + column_name="status" + ) + + op.execute("""DROP type url_status""") + + +def downgrade() -> None: + pass diff --git a/src/api/endpoints/annotate/_shared/queries/get_annotation_batch_info.py b/src/api/endpoints/annotate/_shared/queries/get_annotation_batch_info.py index 5a56cf32..0154ca2f 100644 --- a/src/api/endpoints/annotate/_shared/queries/get_annotation_batch_info.py +++ b/src/api/endpoints/annotate/_shared/queries/get_annotation_batch_info.py @@ -42,7 +42,6 @@ async def run( ) common_where_clause = [ - URL.status == URLStatus.OK.value, LinkBatchURL.batch_id == self.batch_id, ] diff --git a/src/api/endpoints/annotate/_shared/queries/helper.py b/src/api/endpoints/annotate/_shared/queries/helper.py index 76def5c1..9d7e2210 100644 --- a/src/api/endpoints/annotate/_shared/queries/helper.py +++ b/src/api/endpoints/annotate/_shared/queries/helper.py @@ -33,7 +33,6 @@ def add_common_where_conditions( query: Select, ) -> Select: return query.where( - URL.status == URLStatus.OK.value, not_exists_url( FlagURLSuspended ), diff --git a/src/api/endpoints/metrics/batches/aggregated/query/url_error/query.py b/src/api/endpoints/metrics/batches/aggregated/query/url_error/query.py index a7b9e27a..23929c14 100644 --- a/src/api/endpoints/metrics/batches/aggregated/query/url_error/query.py +++ b/src/api/endpoints/metrics/batches/aggregated/query/url_error/query.py @@ -28,7 +28,7 @@ async def run(self, session: AsyncSession) -> list[CountByBatchStrategyResponse] .where( exists_url(URLTaskError) ) - .group_by(Batch.strategy, URL.status) + .group_by(Batch.strategy) ) mappings: Sequence[RowMapping] = await sh.mappings(session, query=query) diff --git a/src/api/endpoints/metrics/urls/breakdown/query/core.py b/src/api/endpoints/metrics/urls/breakdown/query/core.py index c214b169..d2a1703f 100644 --- a/src/api/endpoints/metrics/urls/breakdown/query/core.py +++ b/src/api/endpoints/metrics/urls/breakdown/query/core.py @@ -63,8 +63,7 @@ async def run(self, session: AsyncSession) -> GetMetricsURLsBreakdownPendingResp FlagURLValidated.url_id == URL.id ) .where( - FlagURLValidated.url_id.is_(None), - URL.status == URLStatus.OK + FlagURLValidated.url_id.is_(None) ) .group_by(month) .order_by(month.asc()) diff --git a/src/api/endpoints/submit/data_source/models/response/duplicate.py b/src/api/endpoints/submit/data_source/models/response/duplicate.py index 12367372..7bac1b5a 100644 --- a/src/api/endpoints/submit/data_source/models/response/duplicate.py +++ b/src/api/endpoints/submit/data_source/models/response/duplicate.py @@ -1,11 +1,11 @@ from pydantic import BaseModel -from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.materialized_views.url_status.enums import URLStatusEnum class SubmitDataSourceURLDuplicateSubmissionResponse(BaseModel): message: str url_id: int url_type: URLType | None - url_status: URLStatus \ No newline at end of file + url_status: URLStatusEnum \ No newline at end of file diff --git a/src/api/endpoints/submit/data_source/queries/duplicate.py b/src/api/endpoints/submit/data_source/queries/duplicate.py index 75346cf6..b608da6a 100644 --- a/src/api/endpoints/submit/data_source/queries/duplicate.py +++ b/src/api/endpoints/submit/data_source/queries/duplicate.py @@ -8,6 +8,7 @@ SubmitDataSourceURLDuplicateSubmissionResponse from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView from src.db.queries.base.builder import QueryBuilderBase @@ -29,13 +30,16 @@ async def run(self, session: AsyncSession) -> None: query = ( select( URL.id, - URL.status, + URLStatusMaterializedView.status, FlagURLValidated.type ) .outerjoin( FlagURLValidated, FlagURLValidated.url_id == URL.id ) + .outerjoin( + URLStatusMaterializedView.status + ) .where( URL.url == self.url ) @@ -48,7 +52,7 @@ async def run(self, session: AsyncSession) -> None: model = SubmitDataSourceURLDuplicateSubmissionResponse( message="Duplicate URL found", url_id=mapping[URL.id], - url_status=mapping[URL.status], + url_status=mapping[URLStatusMaterializedView.status], url_type=mapping[FlagURLValidated.type] ) raise HTTPException( diff --git a/src/api/endpoints/url/get/query.py b/src/api/endpoints/url/get/query.py index 6885ef64..6cfbc5ce 100644 --- a/src/api/endpoints/url/get/query.py +++ b/src/api/endpoints/url/get/query.py @@ -1,12 +1,19 @@ -from sqlalchemy import select, exists +from typing import Sequence + +from sqlalchemy import select, exists, RowMapping, func +from sqlalchemy.dialects.postgresql import aggregate_order_by from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import selectinload from src.api.endpoints.url.get.dto import GetURLsResponseInfo, GetURLsResponseErrorInfo, GetURLsResponseInnerInfo from src.collectors.enums import URLStatus from src.db.client.helpers import add_standard_limit_and_offset +from src.db.models.impl import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView from src.db.queries.base.builder import QueryBuilderBase @@ -22,6 +29,57 @@ def __init__( self.errors = errors async def run(self, session: AsyncSession) -> GetURLsResponseInfo: + + error_cte = ( + select( + URLTaskError.url_id, + func.array_agg( + aggregate_order_by( + func.jsonb_build_object( + "type", URLTaskError.task_type, + "error", URLTaskError.error, + "created_at", URLTaskError.created_at + ), + URLTaskError.created_at, + ) + ).label("error_array") + ) + .group_by( + URLTaskError.url_id + ) + .cte("errors") + ) + + + query = ( + select( + URL.id, + LinkBatchURL.batch_id, + URL.full_url, + URL.collector_metadata, + URLStatusMaterializedView.status, + URL.created_at, + URL.updated_at, + URL.name, + error_cte.c.error_array + ) + .join( + URLWebMetadata + ) + .outerjoin( + LinkBatchURL + ) + .join( + URLStatusMaterializedView + ) + .outerjoin( + error_cte, + error_cte.c.url_id == URL.id + ) + .outerjoin( + URLScrapeInfo + ) + ) statement = select(URL).options( selectinload(URL.task_errors), selectinload(URL.batch) @@ -34,27 +92,27 @@ async def run(self, session: AsyncSession) -> GetURLsResponseInfo: ) ) add_standard_limit_and_offset(statement, self.page) - execute_result = await session.execute(statement) - all_results = execute_result.scalars().all() + mappings: Sequence[RowMapping] = await self.sh.mappings(session, query) + final_results = [] - for result in all_results: + for mapping in mappings: error_results = [] - for error in result.task_errors: + for error in mapping["error_array"]: error_result = GetURLsResponseErrorInfo( - task=error.task_type, - error=error.error, - updated_at=error.created_at + task=error["task_type"], + error=error["error"], + updated_at=error["created_at"] ) error_results.append(error_result) final_results.append( GetURLsResponseInnerInfo( - id=result.id, - batch_id=result.batch.id if result.batch is not None else None, - url=result.full_url, - status=URLStatus(result.status), - collector_metadata=result.collector_metadata, - updated_at=result.updated_at, - created_at=result.created_at, + id=mapping[URL.id], + batch_id=mapping[LinkBatchURL.batch_id], + url=mapping[URL.full_url], + collector_metadata=mapping[URL.collector_metadata], + status=mapping[URLStatusMaterializedView.status], + created_at=mapping[URL.created_at], + updated_at=mapping[URL.updated_at], errors=error_results, ) ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py index 04710ba6..960f36ad 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py @@ -12,6 +12,8 @@ from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView from src.db.queries.base.builder import QueryBuilderBase from src.external.pdap.enums import DataSourcesURLStatus from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel @@ -40,7 +42,7 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest: # Required URL.full_url, URL.name, - URL.status, + URLWebMetadata.status_code, URLRecordType.record_type, agency_id_cte.c.agency_ids, # Optional @@ -72,6 +74,10 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest: URLOptionalDataSourceMetadata, URL.id == URLOptionalDataSourceMetadata.url_id, ) + .outerjoin( + URLWebMetadata, + URL.id == URLWebMetadata.url_id + ) .outerjoin( URLInternetArchivesProbeMetadata, URL.id == URLInternetArchivesProbeMetadata.url_id, @@ -118,8 +124,9 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest: scraper_url=mapping[URLOptionalDataSourceMetadata.scraper_url], access_notes=mapping[URLOptionalDataSourceMetadata.access_notes], access_types=mapping[URLOptionalDataSourceMetadata.access_types] or [], + # TODO: Change to convert web metadata result to URL Status url_status=convert_sm_url_status_to_ds_url_status( - sm_url_status=mapping[URL.status], + sm_url_status=mapping[URLWebMetadata.status_code], ), internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None, ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py b/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py index 3f586b20..fb425e0e 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py @@ -1,14 +1,13 @@ from src.collectors.enums import URLStatus +from src.db.models.materialized_views.url_status.enums import URLStatusEnum from src.external.pdap.enums import DataSourcesURLStatus def convert_sm_url_status_to_ds_url_status( - sm_url_status: URLStatus + status_code: int ) -> DataSourcesURLStatus: - match sm_url_status: - case URLStatus.OK: + match status_code: + case 200: return DataSourcesURLStatus.OK - case URLStatus.BROKEN: - return DataSourcesURLStatus.BROKEN case _: - raise ValueError(f"URL status has no corresponding DS Status: {sm_url_status}") \ No newline at end of file + return DataSourcesURLStatus.BROKEN \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/update_url_status/operator.py b/src/core/tasks/scheduled/impl/update_url_status/operator.py deleted file mode 100644 index 82285996..00000000 --- a/src/core/tasks/scheduled/impl/update_url_status/operator.py +++ /dev/null @@ -1,15 +0,0 @@ -from src.core.tasks.scheduled.impl.update_url_status.query import UpdateURLStatusQueryBuilder -from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase -from src.db.enums import TaskType - - -class UpdateURLStatusOperator(ScheduledTaskOperatorBase): - - @property - def task_type(self) -> TaskType: - return TaskType.UPDATE_URL_STATUS - - async def inner_task_logic(self) -> None: - await self.adb_client.run_query_builder( - UpdateURLStatusQueryBuilder() - ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/update_url_status/query.py b/src/core/tasks/scheduled/impl/update_url_status/query.py deleted file mode 100644 index 963405b6..00000000 --- a/src/core/tasks/scheduled/impl/update_url_status/query.py +++ /dev/null @@ -1,49 +0,0 @@ -from sqlalchemy import update, exists, select -from sqlalchemy.ext.asyncio import AsyncSession - -from src.collectors.enums import URLStatus -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata -from src.db.queries.base.builder import QueryBuilderBase - - -class UpdateURLStatusQueryBuilder(QueryBuilderBase): - - async def run(self, session: AsyncSession) -> None: - - # Update broken URLs to nonbroken if their status is not 404 - query_broken_to_ok = ( - update(URL) - .values( - status=URLStatus.OK - ) - .where( - exists( - select(1).where( - URLWebMetadata.url_id == URL.id, # <-- correlate - URLWebMetadata.status_code != 404, - URL.status == URLStatus.BROKEN - ) - ) - ) - ) - - # Update ok URLs to broken if their status is 404 - query_ok_to_broken = ( - update(URL) - .values( - status=URLStatus.BROKEN - ) - .where( - exists( - select(1).where( - URLWebMetadata.url_id == URL.id, # <-- correlate - URLWebMetadata.status_code == 404, - URL.status == URLStatus.OK - ) - ) - ) - ) - - await session.execute(query_broken_to_ok) - await session.execute(query_ok_to_broken) \ No newline at end of file diff --git a/src/core/tasks/scheduled/loader.py b/src/core/tasks/scheduled/loader.py index d2e96cc1..38ebced3 100644 --- a/src/core/tasks/scheduled/loader.py +++ b/src/core/tasks/scheduled/loader.py @@ -25,7 +25,6 @@ from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.delete.core import DSAppSyncMetaURLsDeleteTaskOperator from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.update.core import DSAppSyncMetaURLsUpdateTaskOperator from src.core.tasks.scheduled.impl.task_cleanup.operator import TaskCleanupOperator -from src.core.tasks.scheduled.impl.update_url_status.operator import UpdateURLStatusOperator from src.core.tasks.scheduled.models.entry import ScheduledTaskEntry from src.db.client.async_ import AsyncDatabaseClient from src.external.huggingface.hub.client import HuggingFaceHubClient @@ -230,13 +229,4 @@ async def load_entries(self) -> list[ScheduledTaskEntry]: interval_minutes=IntervalEnum.HOURLY.value, enabled=self.setup_flag("DS_APP_SYNC_AGENCY_DELETE_TASK_FLAG") ), - ### URL - ScheduledTaskEntry( - operator=UpdateURLStatusOperator( - adb_client=self.adb_client - ), - interval_minutes=IntervalEnum.DAILY.value, - enabled=self.setup_flag("UPDATE_URL_STATUS_TASK_FLAG") - ), - ] diff --git a/src/core/tasks/url/operators/auto_relevant/queries/cte.py b/src/core/tasks/url/operators/auto_relevant/queries/cte.py index 354e4bd5..ab90db75 100644 --- a/src/core/tasks/url/operators/auto_relevant/queries/cte.py +++ b/src/core/tasks/url/operators/auto_relevant/queries/cte.py @@ -21,7 +21,6 @@ def __init__(self): URL.id == URLCompressedHTML.url_id ) .where( - URL.status == URLStatus.OK.value, not_exists_url(AnnotationAutoURLType), no_url_task_error(TaskType.RELEVANCY) ).cte("auto_relevant_prerequisites") diff --git a/src/core/tasks/url/operators/html/queries/get.py b/src/core/tasks/url/operators/html/queries/get.py index a6cbe4a8..2a2b39d7 100644 --- a/src/core/tasks/url/operators/html/queries/get.py +++ b/src/core/tasks/url/operators/html/queries/get.py @@ -1,7 +1,10 @@ +from sqlalchemy import RowMapping, Sequence from sqlalchemy.ext.asyncio import AsyncSession +from src.db.models.impl import LinkBatchURL from src.db.models.impl.url.core.pydantic.info import URLInfo from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView from src.db.queries.base.builder import QueryBuilderBase from src.db.statement_composer import StatementComposer @@ -9,22 +12,25 @@ class GetPendingURLsWithoutHTMLDataQueryBuilder(QueryBuilderBase): async def run(self, session: AsyncSession) -> list[URLInfo]: - statement = StatementComposer.has_non_errored_urls_without_html_data() - statement = statement.limit(100).order_by(URL.id) - scalar_result = await session.scalars(statement) - url_results: list[URL] = scalar_result.all() + query = ( + StatementComposer.has_non_errored_urls_without_html_data() + .limit(100) + .order_by(URL.id) + ) - final_results = [] - for url in url_results: + mappings: Sequence[RowMapping] = await self.sh.mappings(session, query) + + final_results: list[URLInfo] = [] + for mapping in mappings: url_info = URLInfo( - id=url.id, - batch_id=url.batch.id if url.batch is not None else None, - url=url.full_url, - collector_metadata=url.collector_metadata, - status=url.status, - created_at=url.created_at, - updated_at=url.updated_at, - name=url.name + id=mapping[URL.id], + batch_id=mapping[LinkBatchURL.batch_id], + url=mapping[URL.full_url], + collector_metadata=mapping[URL.collector_metadata], + status=mapping[URLStatusMaterializedView.status], + created_at=mapping[URL.created_at], + updated_at=mapping[URL.updated_at], + name=mapping[URL.name] ) final_results.append(url_info) diff --git a/src/db/client/async_.py b/src/db/client/async_.py index e30c13bf..f988413f 100644 --- a/src/db/client/async_.py +++ b/src/db/client/async_.py @@ -831,7 +831,6 @@ async def populate_backlog_snapshot( ) .outerjoin(FlagURLValidated, URL.id == FlagURLValidated.url_id) .where( - URL.status == URLStatus.OK.value, FlagURLValidated.url_id.is_(None), ) ) diff --git a/src/core/tasks/scheduled/impl/update_url_status/__init__.py b/src/db/models/materialized_views/batch_url_status/__init__.py similarity index 100% rename from src/core/tasks/scheduled/impl/update_url_status/__init__.py rename to src/db/models/materialized_views/batch_url_status/__init__.py diff --git a/src/db/models/materialized_views/batch_url_status/enums.py b/src/db/models/materialized_views/batch_url_status/enums.py new file mode 100644 index 00000000..033d2c14 --- /dev/null +++ b/src/db/models/materialized_views/batch_url_status/enums.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class BatchURLStatusEnum(Enum): + ERROR = "Error" + UNLABELED_URLS = "Has Unlabeled URLs" + NO_URLS = "No URLs" + LABELING_COMPLETE = "Labeling Complete" \ No newline at end of file diff --git a/src/db/models/materialized_views/batch_url_status/sqlalchemy.py b/src/db/models/materialized_views/batch_url_status/sqlalchemy.py new file mode 100644 index 00000000..7c93cfc6 --- /dev/null +++ b/src/db/models/materialized_views/batch_url_status/sqlalchemy.py @@ -0,0 +1,14 @@ +from sqlalchemy.orm import Mapped + +from src.db.models.mixins import ViewMixin, BatchDependentMixin +from src.db.models.templates_.base import Base + + +class BatchURLStatusMaterializedView( + Base, + ViewMixin, + BatchDependentMixin +): + + __tablename__ = "batch_url_status_mat_view" + batch_url_status: Mapped[str] \ No newline at end of file diff --git a/tests/automated/integration/tasks/scheduled/impl/update_url_status/__init__.py b/src/db/models/materialized_views/url_status/__init__.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/impl/update_url_status/__init__.py rename to src/db/models/materialized_views/url_status/__init__.py diff --git a/src/db/models/materialized_views/url_status/enums.py b/src/db/models/materialized_views/url_status/enums.py new file mode 100644 index 00000000..ebb0c2a1 --- /dev/null +++ b/src/db/models/materialized_views/url_status/enums.py @@ -0,0 +1,20 @@ +""" + +ASE status_text.status + WHEN 'Intake'::text THEN 100 + WHEN 'Error'::text THEN 110 + WHEN 'Community Labeling'::text THEN 200 + WHEN 'Accepted'::text THEN 300 + WHEN 'Awaiting Submission'::text THEN 380 + WHEN 'Submitted'::text THEN 390 +""" +from sqlalchemy import Enum + + +class URLStatusEnum(Enum): + INTAKE = "Intake" + ERROR = "Error" + COMMUNITY_LABELING = "Community Labeling" + ACCEPTED = "Accepted" + AWAITING_SUBMISSION = "Awaiting Submission" + SUBMITTED = "Submitted" \ No newline at end of file diff --git a/src/db/models/materialized_views/url_status/sqlalchemy.py b/src/db/models/materialized_views/url_status/sqlalchemy.py new file mode 100644 index 00000000..fe6c2466 --- /dev/null +++ b/src/db/models/materialized_views/url_status/sqlalchemy.py @@ -0,0 +1,15 @@ +from sqlalchemy.orm import Mapped + +from src.db.models.mixins import URLDependentViewMixin +from src.db.models.templates_.base import Base + + +class URLStatusMaterializedView( + Base, + URLDependentViewMixin +): + + __tablename__ = "url_status_mat_view" + + status: Mapped[str] + code: Mapped[int] \ No newline at end of file diff --git a/src/db/queries/implementations/core/common/annotation_exists_/core.py b/src/db/queries/implementations/core/common/annotation_exists_/core.py index 53e8bcf6..072c04af 100644 --- a/src/db/queries/implementations/core/common/annotation_exists_/core.py +++ b/src/db/queries/implementations/core/common/annotation_exists_/core.py @@ -73,7 +73,6 @@ async def build(self) -> Any: FlagURLValidated.url_id == URL.id ) anno_exists_query = anno_exists_query.where( - URL.status == URLStatus.OK.value, FlagURLValidated.url_id.is_(None) ) anno_exists_query = anno_exists_query.group_by(URL.id).cte("annotations_exist") diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py index 27240b7d..9eb9ef4c 100644 --- a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py +++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py @@ -1,20 +1,13 @@ -from sqlalchemy import Select, case, Label, and_, exists -from sqlalchemy.sql.functions import count, coalesce, func +from sqlalchemy import Select +from sqlalchemy.sql.functions import func -from src.collectors.enums import URLStatus, CollectorType -from src.core.enums import BatchStatus -from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.url.core.sqlalchemy import URL +from src.collectors.enums import CollectorType from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.db.models.views.batch_url_status.core import BatchURLStatusMatView from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum from src.db.queries.base.builder import QueryBuilderBase from src.db.queries.helpers import add_page_offset from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.all import ALL_CTE -from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.duplicate import DUPLICATE_CTE from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.error import ERROR_CTE from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.not_relevant import NOT_RELEVANT_CTE from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.pending import PENDING_CTE @@ -43,7 +36,6 @@ def get_core_query(self): query = ( Select( Batch.id.label(labels.batch_id), - func.coalesce(DUPLICATE_CTE.count, 0).label(labels.duplicate), func.coalesce(SUBMITTED_CTE.count, 0).label(labels.submitted), func.coalesce(PENDING_CTE.count, 0).label(labels.pending), func.coalesce(ALL_CTE.count, 0).label(labels.total), @@ -56,7 +48,7 @@ def get_core_query(self): BatchURLStatusMatView.batch_id == Batch.id, ) ) - for cte in [DUPLICATE_CTE, SUBMITTED_CTE, PENDING_CTE, ALL_CTE, NOT_RELEVANT_CTE, ERROR_CTE]: + for cte in [SUBMITTED_CTE, PENDING_CTE, ALL_CTE, NOT_RELEVANT_CTE, ERROR_CTE]: query = query.outerjoin( cte.cte, Batch.id == cte.batch_id diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/duplicate.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/duplicate.py deleted file mode 100644 index 906dd49c..00000000 --- a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/duplicate.py +++ /dev/null @@ -1,29 +0,0 @@ -from sqlalchemy import select, func - -from src.collectors.enums import URLStatus -from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte_container import \ - URLCountsCTEContainer - -DUPLICATE_CTE = URLCountsCTEContainer( - select( - Batch.id, - func.count(URL.id).label("duplicate_count") - ) - .join( - LinkBatchURL, - LinkBatchURL.batch_id == Batch.id, - ) - .join( - URL, - URL.id == LinkBatchURL.url_id, - ) - .where( - URL.status == URLStatus.DUPLICATE - ) - .group_by( - Batch.id - ).cte("duplicate_count") -) \ No newline at end of file diff --git a/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py b/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py index c224fa40..30aba066 100644 --- a/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py +++ b/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py @@ -43,9 +43,7 @@ async def build(self) -> Any: URL, URL.id == self.url_id ) - .where( - URL.status == URLStatus.OK.value - ).cte("pending") + .cte("pending") ) diff --git a/src/db/statement_composer.py b/src/db/statement_composer.py index faa965a8..512062a6 100644 --- a/src/db/statement_composer.py +++ b/src/db/statement_composer.py @@ -1,7 +1,7 @@ from http import HTTPStatus from typing import Any -from sqlalchemy import Select, select, exists, func, Subquery, and_, not_, ColumnElement +from sqlalchemy import Select, select, exists, func, Subquery, and_, not_, ColumnElement, Exists from sqlalchemy.orm import selectinload from src.collectors.enums import URLStatus @@ -15,6 +15,7 @@ from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView from src.db.types import UserSuggestionType @@ -23,46 +24,57 @@ class StatementComposer: Assists in the composition of SQLAlchemy statements """ + @staticmethod + def exclude_completed_html_task_subquery() -> ColumnElement[bool]: + return ~exists( + select(1) + .select_from( + LinkTaskURL + ) + .join( + Task, + LinkTaskURL.task_id == Task.id + ) + .where( + LinkTaskURL.url_id == URL.id, + Task.task_type == TaskType.HTML.value, + Task.task_status == TaskStatus.COMPLETE.value + ) + ) + @staticmethod def has_non_errored_urls_without_html_data() -> Select: - exclude_subquery = ( - select(1). - select_from(LinkTaskURL). - join(Task, LinkTaskURL.task_id == Task.id). - where(LinkTaskURL.url_id == URL.id). - where(Task.task_type == TaskType.HTML.value). - where(Task.task_status == TaskStatus.COMPLETE.value) - ) query = ( - select(URL) - .join(URLWebMetadata) - .outerjoin(URLScrapeInfo) + select( + URL.id, + LinkBatchURL.batch_id, + URL.full_url, + URL.collector_metadata, + URLStatusMaterializedView.status, + URL.created_at, + URL.updated_at, + URL.name + ) + .join( + URLWebMetadata + ) + .outerjoin( + LinkBatchURL + ) + .join( + URLStatusMaterializedView + ) + .outerjoin( + URLScrapeInfo + ) .where( URLScrapeInfo.url_id == None, - ~exists(exclude_subquery), - URLWebMetadata.status_code == HTTPStatus.OK.value, + StatementComposer.exclude_completed_html_task_subquery, URLWebMetadata.content_type.like("%html%"), ) - .options( - selectinload(URL.batch) - ) ) return query - @staticmethod - def exclude_urls_with_extant_model( - statement: Select, - model: Any - ): - return (statement.where( - ~exists( - select(model.id). - where( - model.url_id == URL.id - ) - ) - )) - @staticmethod def simple_count_subquery(model, attribute: str, label: str) -> Subquery: attr_value = getattr(model, attribute) @@ -74,12 +86,9 @@ def simple_count_subquery(model, attribute: str, label: str) -> Subquery: @staticmethod def pending_urls_missing_miscellaneous_metadata_query() -> Select: query = select(URL).where( - and_( - URL.status == URLStatus.OK.value, URL.name == None, URL.description == None, URLOptionalDataSourceMetadata.url_id == None - ) ).outerjoin( URLOptionalDataSourceMetadata ).join( diff --git a/tests/automated/integration/api/submit/data_source/test_core.py b/tests/automated/integration/api/submit/data_source/test_core.py index 51bbc93b..120abd29 100644 --- a/tests/automated/integration/api/submit/data_source/test_core.py +++ b/tests/automated/integration/api/submit/data_source/test_core.py @@ -78,7 +78,6 @@ async def test_submit_data_source( assert url.scheme == "https" assert url.trailing_slash == True assert url.source == URLSource.MANUAL - assert url.status == URLStatus.OK assert url.description == "Example description" # Check for Batch diff --git a/tests/automated/integration/db/client/approve_url/test_basic.py b/tests/automated/integration/db/client/approve_url/test_basic.py index 9421c1b7..734ff9b5 100644 --- a/tests/automated/integration/db/client/approve_url/test_basic.py +++ b/tests/automated/integration/db/client/approve_url/test_basic.py @@ -42,7 +42,6 @@ async def test_approve_url_basic(db_data_creator: DBDataCreator): assert len(urls) == 1 url = urls[0] assert url.id == url_mapping.url_id - assert url.status == URLStatus.OK assert url.name == "Test Name" assert url.description == "Test Description" diff --git a/tests/automated/integration/tasks/scheduled/impl/update_url_status/test_core.py b/tests/automated/integration/tasks/scheduled/impl/update_url_status/test_core.py deleted file mode 100644 index 6b06fe31..00000000 --- a/tests/automated/integration/tasks/scheduled/impl/update_url_status/test_core.py +++ /dev/null @@ -1,77 +0,0 @@ -import pytest -from sqlalchemy import update - -from src.collectors.enums import URLStatus -from src.core.tasks.scheduled.impl.update_url_status.operator import UpdateURLStatusOperator -from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata -from tests.helpers.data_creator.core import DBDataCreator - - -@pytest.mark.asyncio -async def test_update_url_status_task( - test_url_data_source_id: int, - test_url_meta_url_id: int, - adb_client_test: AsyncDatabaseClient, - db_data_creator: DBDataCreator -): - - # Create Operator - operator = UpdateURLStatusOperator( - adb_client=adb_client_test, - ) - - # Add web metadata to URLs - ## Data Source URL: Add 404 - await db_data_creator.create_web_metadata( - url_ids=[test_url_data_source_id], - status_code=404 - ) - - ## Meta URL: Add 200 - await db_data_creator.create_web_metadata( - url_ids=[test_url_meta_url_id], - status_code=200 - ) - - # Run Task - await operator.run_task() - - # Check URLs - urls: list[URL] = await adb_client_test.get_all(URL) - id_status_set_tuple: set[tuple[int, URLStatus]] = { - (url.id, url.status) - for url in urls - } - ## Data Source URL: Status should now be broken - ## Meta URL: Status should be unchanged - assert id_status_set_tuple == { - (test_url_data_source_id, URLStatus.BROKEN), - (test_url_meta_url_id, URLStatus.OK) - } - - # Update Web Metadata for Data Source URL to be 404 - statement = update(URLWebMetadata).where( - URLWebMetadata.url_id == test_url_data_source_id, - ).values( - status_code=200 - ) - await adb_client_test.execute(statement) - - # Run Task - await operator.run_task() - - # Check URLs - urls: list[URL] = await adb_client_test.get_all(URL) - id_status_set_tuple: set[tuple[int, URLStatus]] = { - (url.id, url.status) - for url in urls - } - ## Data Source URL: Status should now be ok - ## Meta URL: Status should be unchanged - assert id_status_set_tuple == { - (test_url_data_source_id, URLStatus.OK), - (test_url_meta_url_id, URLStatus.OK) - } - diff --git a/tests/automated/integration/tasks/url/impl/html/check/manager.py b/tests/automated/integration/tasks/url/impl/html/check/manager.py index deb0fa11..56c721fa 100644 --- a/tests/automated/integration/tasks/url/impl/html/check/manager.py +++ b/tests/automated/integration/tasks/url/impl/html/check/manager.py @@ -3,6 +3,8 @@ from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView +from tests.automated.integration.tasks.url.impl.html.setup.models.entry import TestURLHTMLTaskSetupEntry from tests.automated.integration.tasks.url.impl.html.setup.models.record import TestURLHTMLTaskSetupRecord @@ -51,9 +53,9 @@ async def _check_scrape_status(self) -> None: assert url_scrape_info.status == expected_scrape_status async def _check_has_same_url_status(self): - urls: list[URL] = await self.adb_client.get_all(URL) + urls: list[URLStatusMaterializedView] = await self.adb_client.get_all(URLStatusMaterializedView) for url in urls: - entry = self._id_to_entry[url.id] + entry: TestURLHTMLTaskSetupEntry = self._id_to_entry[url.id] if entry.expected_result.web_metadata_status_marked_404: continue assert url.status == entry.url_info.status, f"URL {url.url} has outcome {url.status} instead of {entry.url_info.status}" diff --git a/tests/automated/integration/tasks/url/impl/probe/check/manager.py b/tests/automated/integration/tasks/url/impl/probe/check/manager.py index 200f428a..40111201 100644 --- a/tests/automated/integration/tasks/url/impl/probe/check/manager.py +++ b/tests/automated/integration/tasks/url/impl/probe/check/manager.py @@ -5,6 +5,8 @@ from src.db.models.impl.link.url_redirect_url.sqlalchemy import LinkURLRedirectURL from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.db.models.materialized_views.url_status.enums import URLStatusEnum +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView class TestURLProbeCheckManager: @@ -18,10 +20,10 @@ def __init__( async def check_url( self, url_id: int, - expected_status: URLStatus + expected_status: URLStatusEnum ): - url: URL = await self.adb_client.one_or_none( - statement=select(URL).where(URL.id == url_id) + url: URLStatusMaterializedView = await self.adb_client.one_or_none( + statement=select(URLStatusMaterializedView).where(URLStatusMaterializedView.id == url_id) ) assert url is not None assert url.status == expected_status From d3f8041ded10340cd6789cc7255944aa67d521c1 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Wed, 31 Dec 2025 18:59:59 -0500 Subject: [PATCH 21/24] Continue draft --- .../batch/dtos/get/summaries/counts.py | 1 - src/api/endpoints/batch/routes.py | 4 +- src/api/endpoints/collector/manual/query.py | 1 - .../metrics/dtos/get/urls/aggregated/core.py | 2 +- .../metrics/urls/aggregated/query/core.py | 2 +- .../query/subqueries/oldest_pending_url.py | 11 +-- .../aggregated/query/subqueries/status.py | 10 +- .../data_source/models/response/duplicate.py | 4 +- .../submit/data_source/queries/core.py | 1 - .../submit/data_source/queries/duplicate.py | 3 +- src/api/endpoints/submit/url/queries/core.py | 1 - src/api/endpoints/task/by_id/query.py | 2 +- src/api/endpoints/url/get/dto.py | 3 +- src/api/endpoints/url/get/query.py | 26 ++--- src/collectors/queries/insert/url.py | 1 - src/core/core.py | 4 +- .../impl/data_sources/add/queries/get.py | 2 +- .../impl/data_sources/update/queries/get.py | 9 +- .../impl/meta_urls/add/queries/get.py | 9 +- .../impl/meta_urls/update/queries/get.py | 9 +- .../impl/sync_to_ds/shared/convert.py | 2 - src/core/tasks/url/operators/html/core.py | 5 +- .../operators/html/queries/get}/__init__.py | 0 .../html/queries/{get.py => get/query.py} | 11 +-- .../url/operators/html/queries/helpers.py | 51 ++++++++++ .../operators/html/queries/prerequisites.py | 13 +++ src/db/client/async_.py | 16 +--- src/db/client/sync.py | 1 - .../models/impl/url/core/pydantic/insert.py | 1 - src/db/models/impl/url/core/sqlalchemy.py | 5 - .../batch_url_status/core.py | 2 +- .../batch_url_status/enums.py | 6 +- .../batch_url_status/sqlalchemy.py | 14 --- .../materialized_views/url_status/enums.py | 20 +--- src/db/models/views/batch_url_status/enums.py | 8 -- src/db/models/views/url_status/__init__.py | 0 src/db/models/views/url_status/core.py | 72 -------------- src/db/models/views/url_status/enums.py | 10 -- .../get/recent_batch_summaries/builder.py | 14 ++- .../url_counts/builder.py | 12 +-- .../url_counts/labels.py | 1 - src/db/statement_composer.py | 51 ---------- .../api/_helpers/RequestValidator.py | 4 +- .../api/batch/summaries/test_happy_path.py | 3 - .../summaries/test_pending_url_filter.py | 4 +- .../api/metrics/batches/test_aggregated.py | 2 - .../api/metrics/urls/aggregated/test_core.py | 2 +- .../api/submit/data_source/test_duplicate.py | 4 +- .../automated/integration/api/url/test_get.py | 1 + tests/automated/integration/conftest.py | 1 - .../db/structure/test_updated_at.py | 1 - .../integration/readonly/setup/data_source.py | 2 - .../integration/readonly/setup/meta_url.py | 1 - .../impl/huggingface/setup/queries/setup.py | 1 - .../impl/sync_to_ds/data_source/test_add.py | 1 - .../impl/sync_to_ds/meta_url/test_add.py | 6 +- .../tasks/scheduled/loader/test_happy_path.py | 2 +- .../tasks/url/impl/html/check/__init__.py | 0 .../tasks/url/impl/html/check/manager.py | 70 -------------- .../tasks/url/impl/html/conftest.py | 28 ++++++ .../tasks/url/impl/html/mocks/methods.py | 3 - .../mocks/url_request_interface/__init__.py | 0 .../html/mocks/url_request_interface/core.py | 11 --- .../html/mocks/url_request_interface/setup.py | 57 ----------- .../tasks/url/impl/html/setup/__init__.py | 0 .../tasks/url/impl/html/setup/data.py | 94 ------------------- .../tasks/url/impl/html/setup/manager.py | 79 ---------------- .../url/impl/html/setup/models/__init__.py | 0 .../tasks/url/impl/html/setup/models/entry.py | 34 ------- .../url/impl/html/setup/models/record.py | 8 -- .../tasks/url/impl/html/test_200.py | 81 ++++++++++++++++ .../tasks/url/impl/html/test_404.py | 65 +++++++++++++ .../tasks/url/impl/html/test_error.py | 62 ++++++++++++ .../url/impl/html/test_no_web_metadata.py | 27 ++++++ .../tasks/url/impl/html/test_non_200.py | 32 +++++++ .../tasks/url/impl/html/test_task.py | 33 ------- .../tasks/url/impl/probe/check/manager.py | 10 +- .../url/impl/probe/no_redirect/test_error.py | 9 +- .../impl/probe/no_redirect/test_not_found.py | 8 +- .../url/impl/probe/no_redirect/test_ok.py | 7 +- .../impl/probe/no_redirect/test_two_urls.py | 4 +- .../probe/redirect/dest_new/test_dest_ok.py | 12 +-- .../probe/redirect/test_dest_exists_in_db.py | 12 +-- .../redirect/test_functional_equivalent.py | 2 +- .../probe/redirect/test_two_urls_same_dest.py | 13 +-- .../tasks/url/impl/probe/setup/manager.py | 2 - .../data_creator/commands/impl/urls_/query.py | 1 - tests/helpers/data_creator/core.py | 2 - tests/helpers/data_creator/create.py | 2 - tests/helpers/data_creator/generate.py | 2 - 90 files changed, 484 insertions(+), 741 deletions(-) rename src/{db/models/views/batch_url_status => core/tasks/url/operators/html/queries/get}/__init__.py (100%) rename src/core/tasks/url/operators/html/queries/{get.py => get/query.py} (68%) create mode 100644 src/core/tasks/url/operators/html/queries/helpers.py create mode 100644 src/core/tasks/url/operators/html/queries/prerequisites.py rename src/db/models/{views => materialized_views}/batch_url_status/core.py (98%) delete mode 100644 src/db/models/materialized_views/batch_url_status/sqlalchemy.py delete mode 100644 src/db/models/views/batch_url_status/enums.py delete mode 100644 src/db/models/views/url_status/__init__.py delete mode 100644 src/db/models/views/url_status/core.py delete mode 100644 src/db/models/views/url_status/enums.py delete mode 100644 tests/automated/integration/tasks/url/impl/html/check/__init__.py delete mode 100644 tests/automated/integration/tasks/url/impl/html/check/manager.py create mode 100644 tests/automated/integration/tasks/url/impl/html/conftest.py delete mode 100644 tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/__init__.py delete mode 100644 tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/core.py delete mode 100644 tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/setup.py delete mode 100644 tests/automated/integration/tasks/url/impl/html/setup/__init__.py delete mode 100644 tests/automated/integration/tasks/url/impl/html/setup/data.py delete mode 100644 tests/automated/integration/tasks/url/impl/html/setup/manager.py delete mode 100644 tests/automated/integration/tasks/url/impl/html/setup/models/__init__.py delete mode 100644 tests/automated/integration/tasks/url/impl/html/setup/models/entry.py delete mode 100644 tests/automated/integration/tasks/url/impl/html/setup/models/record.py create mode 100644 tests/automated/integration/tasks/url/impl/html/test_200.py create mode 100644 tests/automated/integration/tasks/url/impl/html/test_404.py create mode 100644 tests/automated/integration/tasks/url/impl/html/test_error.py create mode 100644 tests/automated/integration/tasks/url/impl/html/test_no_web_metadata.py create mode 100644 tests/automated/integration/tasks/url/impl/html/test_non_200.py delete mode 100644 tests/automated/integration/tasks/url/impl/html/test_task.py diff --git a/src/api/endpoints/batch/dtos/get/summaries/counts.py b/src/api/endpoints/batch/dtos/get/summaries/counts.py index 0ce4e468..0faaa20b 100644 --- a/src/api/endpoints/batch/dtos/get/summaries/counts.py +++ b/src/api/endpoints/batch/dtos/get/summaries/counts.py @@ -4,7 +4,6 @@ class BatchSummaryURLCounts(BaseModel): total: int pending: int - duplicate: int not_relevant: int submitted: int errored: int diff --git a/src/api/endpoints/batch/routes.py b/src/api/endpoints/batch/routes.py index 87839fb7..4dfbbbfc 100644 --- a/src/api/endpoints/batch/routes.py +++ b/src/api/endpoints/batch/routes.py @@ -10,7 +10,7 @@ from src.api.endpoints.batch.urls.dto import GetURLsByBatchResponse from src.collectors.enums import CollectorType from src.core.core import AsyncCore -from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum +from src.db.models.materialized_views.batch_url_status.enums import BatchURLStatusViewEnum from src.security.dtos.access_info import AccessInfo from src.security.manager import get_access_info @@ -27,7 +27,7 @@ async def get_batch_status( description="Filter by collector type", default=None ), - status: BatchURLStatusEnum | None = Query( + status: BatchURLStatusViewEnum | None = Query( description="Filter by status", default=None ), diff --git a/src/api/endpoints/collector/manual/query.py b/src/api/endpoints/collector/manual/query.py index 5ebe0e4b..31cd91ad 100644 --- a/src/api/endpoints/collector/manual/query.py +++ b/src/api/endpoints/collector/manual/query.py @@ -53,7 +53,6 @@ async def run(self, session: AsyncSession) -> ManualBatchResponseDTO: name=entry.name, description=entry.description, collector_metadata=entry.collector_metadata, - status=URLStatus.OK.value, source=URLSource.MANUAL, trailing_slash=url_and_scheme.url.endswith('/'), ) diff --git a/src/api/endpoints/metrics/dtos/get/urls/aggregated/core.py b/src/api/endpoints/metrics/dtos/get/urls/aggregated/core.py index 7dbbc48a..1c8ba860 100644 --- a/src/api/endpoints/metrics/dtos/get/urls/aggregated/core.py +++ b/src/api/endpoints/metrics/dtos/get/urls/aggregated/core.py @@ -4,7 +4,7 @@ from src.core.enums import RecordType from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.views.url_status.enums import URLStatusViewEnum +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum class GetMetricsURLValidatedOldestPendingURL(BaseModel): url_id: int diff --git a/src/api/endpoints/metrics/urls/aggregated/query/core.py b/src/api/endpoints/metrics/urls/aggregated/query/core.py index c6dbc29f..880c7e3b 100644 --- a/src/api/endpoints/metrics/urls/aggregated/query/core.py +++ b/src/api/endpoints/metrics/urls/aggregated/query/core.py @@ -11,7 +11,7 @@ from src.core.enums import RecordType from src.db.helpers.session import session_helper as sh from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.views.url_status.enums import URLStatusViewEnum +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/oldest_pending_url.py b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/oldest_pending_url.py index e086b752..f8a8f571 100644 --- a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/oldest_pending_url.py +++ b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/oldest_pending_url.py @@ -3,11 +3,10 @@ from src.api.endpoints.metrics.dtos.get.urls.aggregated.core import GetMetricsURLValidatedOldestPendingURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.views.url_status.core import URLStatusMatView -from src.db.models.views.url_status.enums import URLStatusViewEnum +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum from src.db.queries.base.builder import QueryBuilderBase -from src.db.helpers.session import session_helper as sh class GetOldestPendingURLQueryBuilder(QueryBuilderBase): @@ -18,14 +17,14 @@ async def run( query = ( select( - URLStatusMatView.url_id, + URLStatusMaterializedView.url_id, URL.created_at ) .join( URL, - URLStatusMatView.url_id == URL.id + URLStatusMaterializedView.url_id == URL.id ).where( - URLStatusMatView.status.not_in( + URLStatusMaterializedView.status.not_in( [ URLStatusViewEnum.SUBMITTED.value, URLStatusViewEnum.ACCEPTED.value, diff --git a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/status.py b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/status.py index 05813ce0..6f369b32 100644 --- a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/status.py +++ b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/status.py @@ -4,8 +4,8 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.db.helpers.session import session_helper as sh -from src.db.models.views.url_status.core import URLStatusMatView -from src.db.models.views.url_status.enums import URLStatusViewEnum +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum from src.db.queries.base.builder import QueryBuilderBase @@ -18,13 +18,13 @@ async def run( query = ( select( - URLStatusMatView.status, + URLStatusMaterializedView.status, func.count( - URLStatusMatView.url_id + URLStatusMaterializedView.url_id ).label("count") ) .group_by( - URLStatusMatView.status + URLStatusMaterializedView.status ) ) diff --git a/src/api/endpoints/submit/data_source/models/response/duplicate.py b/src/api/endpoints/submit/data_source/models/response/duplicate.py index 7bac1b5a..f1414b8f 100644 --- a/src/api/endpoints/submit/data_source/models/response/duplicate.py +++ b/src/api/endpoints/submit/data_source/models/response/duplicate.py @@ -1,11 +1,11 @@ from pydantic import BaseModel from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.materialized_views.url_status.enums import URLStatusEnum +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum class SubmitDataSourceURLDuplicateSubmissionResponse(BaseModel): message: str url_id: int url_type: URLType | None - url_status: URLStatusEnum \ No newline at end of file + url_status: URLStatusViewEnum \ No newline at end of file diff --git a/src/api/endpoints/submit/data_source/queries/core.py b/src/api/endpoints/submit/data_source/queries/core.py index 77c33dca..f4329786 100644 --- a/src/api/endpoints/submit/data_source/queries/core.py +++ b/src/api/endpoints/submit/data_source/queries/core.py @@ -44,7 +44,6 @@ async def run( trailing_slash=full_url.has_trailing_slash, name=self.request.name, description=self.request.description, - status=URLStatus.OK, source=URLSource.MANUAL, ) diff --git a/src/api/endpoints/submit/data_source/queries/duplicate.py b/src/api/endpoints/submit/data_source/queries/duplicate.py index b608da6a..d4409e91 100644 --- a/src/api/endpoints/submit/data_source/queries/duplicate.py +++ b/src/api/endpoints/submit/data_source/queries/duplicate.py @@ -38,7 +38,8 @@ async def run(self, session: AsyncSession) -> None: FlagURLValidated.url_id == URL.id ) .outerjoin( - URLStatusMaterializedView.status + URLStatusMaterializedView, + URLStatusMaterializedView.url_id == URL.id ) .where( URL.url == self.url diff --git a/src/api/endpoints/submit/url/queries/core.py b/src/api/endpoints/submit/url/queries/core.py index 54ab5439..8e257072 100644 --- a/src/api/endpoints/submit/url/queries/core.py +++ b/src/api/endpoints/submit/url/queries/core.py @@ -61,7 +61,6 @@ async def run(self, session: AsyncSession) -> URLSubmissionResponse: url=url_and_scheme.url, scheme=url_and_scheme.scheme, source=URLSource.MANUAL, - status=URLStatus.OK, description=self.request.description, trailing_slash=url_and_scheme.url.endswith('/'), ) diff --git a/src/api/endpoints/task/by_id/query.py b/src/api/endpoints/task/by_id/query.py index 92487327..6aa55fd0 100644 --- a/src/api/endpoints/task/by_id/query.py +++ b/src/api/endpoints/task/by_id/query.py @@ -35,6 +35,7 @@ async def run(self, session: AsyncSession) -> TaskInfo: error = task.errors[0].error if len(task.errors) > 0 else None # Get error info if any # Get URLs + # TODO: Revise to include URL Status from URL Web metadata urls = task.urls url_infos = [] for url in urls: @@ -43,7 +44,6 @@ async def run(self, session: AsyncSession) -> TaskInfo: batch_id=url.batch.id, url=url.url, collector_metadata=url.collector_metadata, - status=URLStatus(url.status), updated_at=url.updated_at ) url_infos.append(url_info) diff --git a/src/api/endpoints/url/get/dto.py b/src/api/endpoints/url/get/dto.py index a4616d7e..6eee6e51 100644 --- a/src/api/endpoints/url/get/dto.py +++ b/src/api/endpoints/url/get/dto.py @@ -5,6 +5,7 @@ from src.collectors.enums import URLStatus from src.db.enums import URLMetadataAttributeType, ValidationStatus, ValidationSource, TaskType +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum class GetURLsResponseErrorInfo(BaseModel): @@ -25,7 +26,7 @@ class GetURLsResponseInnerInfo(BaseModel): id: int batch_id: int | None url: str - status: URLStatus + status: URLStatusViewEnum | None collector_metadata: dict | None updated_at: datetime.datetime created_at: datetime.datetime diff --git a/src/api/endpoints/url/get/query.py b/src/api/endpoints/url/get/query.py index 6cfbc5ce..a11bbd64 100644 --- a/src/api/endpoints/url/get/query.py +++ b/src/api/endpoints/url/get/query.py @@ -6,13 +6,11 @@ from sqlalchemy.orm import selectinload from src.api.endpoints.url.get.dto import GetURLsResponseInfo, GetURLsResponseErrorInfo, GetURLsResponseInnerInfo -from src.collectors.enums import URLStatus from src.db.client.helpers import add_standard_limit_and_offset from src.db.models.impl import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError -from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView from src.db.queries.base.builder import QueryBuilderBase @@ -36,7 +34,7 @@ async def run(self, session: AsyncSession) -> GetURLsResponseInfo: func.array_agg( aggregate_order_by( func.jsonb_build_object( - "type", URLTaskError.task_type, + "task_type", URLTaskError.task_type, "error", URLTaskError.error, "created_at", URLTaskError.created_at ), @@ -63,14 +61,12 @@ async def run(self, session: AsyncSession) -> GetURLsResponseInfo: URL.name, error_cte.c.error_array ) - .join( - URLWebMetadata - ) .outerjoin( LinkBatchURL ) - .join( - URLStatusMaterializedView + .outerjoin( + URLStatusMaterializedView, + URLStatusMaterializedView.url_id == URL.id ) .outerjoin( error_cte, @@ -79,25 +75,23 @@ async def run(self, session: AsyncSession) -> GetURLsResponseInfo: .outerjoin( URLScrapeInfo ) + .order_by(URL.id) ) - statement = select(URL).options( - selectinload(URL.task_errors), - selectinload(URL.batch) - ).order_by(URL.id) if self.errors: # Only return URLs with errors - statement = statement.where( + query = query.where( exists( select(URLTaskError).where(URLTaskError.url_id == URL.id) ) ) - add_standard_limit_and_offset(statement, self.page) + add_standard_limit_and_offset(query, self.page) mappings: Sequence[RowMapping] = await self.sh.mappings(session, query) final_results = [] for mapping in mappings: error_results = [] - for error in mapping["error_array"]: + error_array = mapping["error_array"] or [] + for error in error_array: error_result = GetURLsResponseErrorInfo( task=error["task_type"], error=error["error"], @@ -108,7 +102,7 @@ async def run(self, session: AsyncSession) -> GetURLsResponseInfo: GetURLsResponseInnerInfo( id=mapping[URL.id], batch_id=mapping[LinkBatchURL.batch_id], - url=mapping[URL.full_url], + url=mapping["full_url"], collector_metadata=mapping[URL.collector_metadata], status=mapping[URLStatusMaterializedView.status], created_at=mapping[URL.created_at], diff --git a/src/collectors/queries/insert/url.py b/src/collectors/queries/insert/url.py index 60f39a2c..3b21d210 100644 --- a/src/collectors/queries/insert/url.py +++ b/src/collectors/queries/insert/url.py @@ -22,7 +22,6 @@ async def run(self, session: AsyncSession) -> int: url=url_and_scheme.url.rstrip('/'), scheme=url_and_scheme.scheme, collector_metadata=self.url_info.collector_metadata, - status=self.url_info.status.value, source=self.url_info.source, trailing_slash=url_and_scheme.url.endswith('/'), ) diff --git a/src/core/core.py b/src/core/core.py index ad2f20d5..cbee2d84 100644 --- a/src/core/core.py +++ b/src/core/core.py @@ -31,7 +31,7 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.enums import TaskType from src.db.models.impl.batch.pydantic.info import BatchInfo -from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum +from src.db.models.materialized_views.batch_url_status.enums import BatchURLStatusViewEnum class AsyncCore: @@ -81,7 +81,7 @@ async def get_duplicate_urls_by_batch(self, batch_id: int, page: int = 1) -> Get async def get_batch_statuses( self, collector_type: CollectorType | None, - status: BatchURLStatusEnum | None, + status: BatchURLStatusViewEnum | None, page: int ) -> GetBatchSummariesResponse: results = await self.adb_client.get_batch_summaries( diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py index 960f36ad..487850dd 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py @@ -126,7 +126,7 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest: access_types=mapping[URLOptionalDataSourceMetadata.access_types] or [], # TODO: Change to convert web metadata result to URL Status url_status=convert_sm_url_status_to_ds_url_status( - sm_url_status=mapping[URLWebMetadata.status_code], + mapping[URLWebMetadata.status_code], ), internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None, ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py index a710b6f7..8b23f339 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py @@ -12,6 +12,7 @@ from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.queries.base.builder import QueryBuilderBase from src.external.pdap.enums import DataSourcesURLStatus from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel @@ -41,7 +42,7 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest: # Required URL.full_url, URL.name, - URL.status, + URLWebMetadata.status_code, URLRecordType.record_type, agency_id_cte.c.agency_ids, # Optional @@ -82,6 +83,10 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest: URLRecordType, URLRecordType.url_id == URL.id, ) + .outerjoin( + URLWebMetadata, + URLWebMetadata.url_id == URL.id, + ) .outerjoin( agency_id_cte, cte.url_id == agency_id_cte.c.url_id @@ -122,7 +127,7 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest: access_types=mapping[URLOptionalDataSourceMetadata.access_types] or [], data_portal_type_other=mapping[URLOptionalDataSourceMetadata.data_portal_type_other], url_status=convert_sm_url_status_to_ds_url_status( - sm_url_status=mapping[URL.status], + mapping[URLWebMetadata.status_code], ), internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None, ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/get.py index 5a784295..02ff8c8f 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/get.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/get.py @@ -10,6 +10,7 @@ from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.queries.base.builder import QueryBuilderBase from src.external.pdap.impl.sync.meta_urls._shared.content import MetaURLSyncContentModel from src.external.pdap.impl.sync.meta_urls.add.request import AddMetaURLsOuterRequest, AddMetaURLsInnerRequest @@ -36,7 +37,7 @@ async def run(self, session: AsyncSession) -> AddMetaURLsOuterRequest: select( cte.url_id, URL.full_url, - URL.status, + URLWebMetadata.status_code, URLInternetArchivesProbeMetadata.archive_url, agency_id_cte.c.agency_ids ) @@ -47,6 +48,10 @@ async def run(self, session: AsyncSession) -> AddMetaURLsOuterRequest: URL, URL.id == cte.url_id, ) + .join( + URLWebMetadata, + URL.id == URLWebMetadata.url_id, + ) .outerjoin( URLInternetArchivesProbeMetadata, URL.id == URLInternetArchivesProbeMetadata.url_id, @@ -73,7 +78,7 @@ async def run(self, session: AsyncSession) -> AddMetaURLsOuterRequest: agency_ids=mapping["agency_ids"], internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None, url_status=convert_sm_url_status_to_ds_url_status( - sm_url_status=mapping[URL.status], + mapping[URLWebMetadata.status_code], ), ) ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/get.py index 8cdb8ed6..c73909dc 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/get.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/get.py @@ -10,6 +10,7 @@ from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.queries.base.builder import QueryBuilderBase from src.external.pdap.impl.sync.meta_urls._shared.content import MetaURLSyncContentModel from src.external.pdap.impl.sync.meta_urls.update.request import UpdateMetaURLsOuterRequest, UpdateMetaURLsInnerRequest @@ -35,7 +36,7 @@ async def run(self, session: AsyncSession) -> UpdateMetaURLsOuterRequest: select( cte.ds_meta_url_id, URL.full_url, - URL.status, + URLWebMetadata.status_code, agency_id_cte.c.agency_ids, URLInternetArchivesProbeMetadata.archive_url, ) @@ -50,6 +51,10 @@ async def run(self, session: AsyncSession) -> UpdateMetaURLsOuterRequest: URLInternetArchivesProbeMetadata, URL.id == URLInternetArchivesProbeMetadata.url_id, ) + .outerjoin( + URLWebMetadata, + URL.id == URLWebMetadata.url_id, + ) .outerjoin( agency_id_cte, cte.url_id == agency_id_cte.c.url_id @@ -72,7 +77,7 @@ async def run(self, session: AsyncSession) -> UpdateMetaURLsOuterRequest: agency_ids=mapping["agency_ids"] or [], internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None, url_status=convert_sm_url_status_to_ds_url_status( - sm_url_status=mapping[URL.status], + mapping[URLWebMetadata.status_code], ), ) ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py b/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py index fb425e0e..3de3e502 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py @@ -1,5 +1,3 @@ -from src.collectors.enums import URLStatus -from src.db.models.materialized_views.url_status.enums import URLStatusEnum from src.external.pdap.enums import DataSourcesURLStatus diff --git a/src/core/tasks/url/operators/html/core.py b/src/core/tasks/url/operators/html/core.py index 26f70cdb..5983ab69 100644 --- a/src/core/tasks/url/operators/html/core.py +++ b/src/core/tasks/url/operators/html/core.py @@ -1,6 +1,7 @@ from src.core.tasks.url.operators.base import URLTaskOperatorBase from src.core.tasks.url.operators.html.filter import filter_just_urls, filter_404_subset from src.core.tasks.url.operators.html.queries.insert.query import InsertURLHTMLInfoQueryBuilder +from src.core.tasks.url.operators.html.queries.prerequisites import PendingURLsWithoutHTMLDataPrerequisitesQueryBuilder from src.core.tasks.url.operators.html.scraper.parser.core import HTMLResponseParser from src.core.tasks.url.operators.html.tdo import UrlHtmlTDO from src.db.client.async_ import AsyncDatabaseClient @@ -26,7 +27,9 @@ def task_type(self) -> TaskType: return TaskType.HTML async def meets_task_prerequisites(self) -> bool: - return await self.adb_client.has_non_errored_urls_without_html_data() + return await self.run_query_builder( + PendingURLsWithoutHTMLDataPrerequisitesQueryBuilder() + ) async def inner_task_logic(self) -> None: tdos = await self._get_non_errored_urls_without_html_data() diff --git a/src/db/models/views/batch_url_status/__init__.py b/src/core/tasks/url/operators/html/queries/get/__init__.py similarity index 100% rename from src/db/models/views/batch_url_status/__init__.py rename to src/core/tasks/url/operators/html/queries/get/__init__.py diff --git a/src/core/tasks/url/operators/html/queries/get.py b/src/core/tasks/url/operators/html/queries/get/query.py similarity index 68% rename from src/core/tasks/url/operators/html/queries/get.py rename to src/core/tasks/url/operators/html/queries/get/query.py index 2a2b39d7..a4088157 100644 --- a/src/core/tasks/url/operators/html/queries/get.py +++ b/src/core/tasks/url/operators/html/queries/get/query.py @@ -1,6 +1,7 @@ from sqlalchemy import RowMapping, Sequence from sqlalchemy.ext.asyncio import AsyncSession +from src.core.tasks.url.operators.html.queries.helpers import has_non_errored_urls_without_html_data from src.db.models.impl import LinkBatchURL from src.db.models.impl.url.core.pydantic.info import URLInfo from src.db.models.impl.url.core.sqlalchemy import URL @@ -13,7 +14,7 @@ class GetPendingURLsWithoutHTMLDataQueryBuilder(QueryBuilderBase): async def run(self, session: AsyncSession) -> list[URLInfo]: query = ( - StatementComposer.has_non_errored_urls_without_html_data() + has_non_errored_urls_without_html_data() .limit(100) .order_by(URL.id) ) @@ -24,13 +25,7 @@ async def run(self, session: AsyncSession) -> list[URLInfo]: for mapping in mappings: url_info = URLInfo( id=mapping[URL.id], - batch_id=mapping[LinkBatchURL.batch_id], - url=mapping[URL.full_url], - collector_metadata=mapping[URL.collector_metadata], - status=mapping[URLStatusMaterializedView.status], - created_at=mapping[URL.created_at], - updated_at=mapping[URL.updated_at], - name=mapping[URL.name] + url=mapping["full_url"], ) final_results.append(url_info) diff --git a/src/core/tasks/url/operators/html/queries/helpers.py b/src/core/tasks/url/operators/html/queries/helpers.py new file mode 100644 index 00000000..4c7eb89c --- /dev/null +++ b/src/core/tasks/url/operators/html/queries/helpers.py @@ -0,0 +1,51 @@ +from sqlalchemy import ColumnElement, exists, select, Select + +from src.db.enums import TaskType +from src.db.models.impl import LinkBatchURL +from src.db.models.impl.link.task_url import LinkTaskURL +from src.db.models.impl.task.core import Task +from src.db.models.impl.task.enums import TaskStatus +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView + + +def _exclude_completed_html_task_subquery() -> ColumnElement[bool]: + return ~exists( + select(1) + .select_from( + LinkTaskURL + ) + .join( + Task, + LinkTaskURL.task_id == Task.id + ) + .where( + LinkTaskURL.url_id == URL.id, + Task.task_type == TaskType.HTML.value, + Task.task_status == TaskStatus.COMPLETE.value + ) + ) + +def has_non_errored_urls_without_html_data() -> Select: + query = ( + select( + URL.id, + URL.full_url, + ) + .join( + URLWebMetadata, + URLWebMetadata.url_id == URL.id + ) + .outerjoin( + URLScrapeInfo + ) + .where( + URLScrapeInfo.url_id == None, + _exclude_completed_html_task_subquery, + URLWebMetadata.status_code == 200, + URLWebMetadata.content_type.like("%html%"), + ) + ) + return query diff --git a/src/core/tasks/url/operators/html/queries/prerequisites.py b/src/core/tasks/url/operators/html/queries/prerequisites.py new file mode 100644 index 00000000..5fa0c94a --- /dev/null +++ b/src/core/tasks/url/operators/html/queries/prerequisites.py @@ -0,0 +1,13 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.url.operators.html.queries.helpers import has_non_errored_urls_without_html_data +from src.db.queries.base.builder import QueryBuilderBase + + +class PendingURLsWithoutHTMLDataPrerequisitesQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> bool: + statement = has_non_errored_urls_without_html_data() + statement = statement.limit(1) + scalar_result = await session.scalars(statement) + return bool(scalar_result.first()) \ No newline at end of file diff --git a/src/db/client/async_.py b/src/db/client/async_.py index f988413f..6377fa60 100644 --- a/src/db/client/async_.py +++ b/src/db/client/async_.py @@ -36,13 +36,13 @@ from src.api.endpoints.task.dtos.get.tasks import GetTasksResponse, GetTasksResponseTaskInfo from src.api.endpoints.url.get.dto import GetURLsResponseInfo from src.api.endpoints.url.get.query import GetURLsQueryBuilder -from src.collectors.enums import URLStatus, CollectorType +from src.collectors.enums import CollectorType from src.collectors.queries.insert.urls.query import InsertURLsQueryBuilder from src.core.enums import BatchStatus, RecordType from src.core.env_var_manager import EnvVarManager from src.core.tasks.scheduled.impl.huggingface.queries.state import SetHuggingFaceUploadStateQueryBuilder from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo -from src.core.tasks.url.operators.html.queries.get import \ +from src.core.tasks.url.operators.html.queries.get.query import \ GetPendingURLsWithoutHTMLDataQueryBuilder from src.core.tasks.url.operators.misc_metadata.tdo import URLMiscellaneousMetadataTDO from src.db.client.helpers import add_standard_limit_and_offset @@ -83,7 +83,7 @@ from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.models.templates_.base import Base -from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum +from src.db.models.materialized_views.batch_url_status.enums import BatchURLStatusViewEnum from src.db.queries.base.builder import QueryBuilderBase from src.db.queries.implementations.core.get.recent_batch_summaries.builder import GetRecentBatchSummariesQueryBuilder from src.db.queries.implementations.core.metrics.urls.aggregated.pending import \ @@ -321,14 +321,6 @@ async def add_user_record_type_suggestion( # endregion record_type - - @session_manager - async def has_non_errored_urls_without_html_data(self, session: AsyncSession) -> bool: - statement = self.statement_composer.has_non_errored_urls_without_html_data() - statement = statement.limit(1) - scalar_result = await session.scalars(statement) - return bool(scalar_result.first()) - @session_manager async def add_miscellaneous_metadata(self, session: AsyncSession, tdos: list[URLMiscellaneousMetadataTDO]): updates = [] @@ -700,7 +692,7 @@ async def get_batch_summaries( session, page: int, collector_type: CollectorType | None = None, - status: BatchURLStatusEnum | None = None, + status: BatchURLStatusViewEnum | None = None, ) -> GetBatchSummariesResponse: # Get only the batch_id, collector_type, status, and created_at builder = GetRecentBatchSummariesQueryBuilder( diff --git a/src/db/client/sync.py b/src/db/client/sync.py index e29909cf..c5d90167 100644 --- a/src/db/client/sync.py +++ b/src/db/client/sync.py @@ -124,7 +124,6 @@ def insert_url(self, session, url_info: URLInfo) -> int: url=url_and_scheme.url, scheme=url_and_scheme.scheme, collector_metadata=url_info.collector_metadata, - status=url_info.status, name=url_info.name, trailing_slash=url_and_scheme.url.endswith('/'), source=url_info.source diff --git a/src/db/models/impl/url/core/pydantic/insert.py b/src/db/models/impl/url/core/pydantic/insert.py index ed73b6c1..33842d53 100644 --- a/src/db/models/impl/url/core/pydantic/insert.py +++ b/src/db/models/impl/url/core/pydantic/insert.py @@ -17,6 +17,5 @@ def sa_model(cls) -> type[Base]: scheme: str | None = None collector_metadata: dict | None = None name: str | None = None - status: URLStatus = URLStatus.OK source: URLSource trailing_slash: bool \ No newline at end of file diff --git a/src/db/models/impl/url/core/sqlalchemy.py b/src/db/models/impl/url/core/sqlalchemy.py index 45e8b45b..35178505 100644 --- a/src/db/models/impl/url/core/sqlalchemy.py +++ b/src/db/models/impl/url/core/sqlalchemy.py @@ -39,11 +39,6 @@ class URL(UpdatedAtMixin, CreatedAtMixin, WithIDBase): # The metadata from the collector collector_metadata = Column(JSON) # The outcome of the URL: submitted, human_labeling, rejected, duplicate, etc. - status: Mapped[URLStatus] = enum_column( - URLStatus, - name='url_status', - nullable=False - ) trailing_slash = Column(Boolean, nullable=False) @hybrid_property diff --git a/src/db/models/views/batch_url_status/core.py b/src/db/models/materialized_views/batch_url_status/core.py similarity index 98% rename from src/db/models/views/batch_url_status/core.py rename to src/db/models/materialized_views/batch_url_status/core.py index 1ec0711d..12d2872e 100644 --- a/src/db/models/views/batch_url_status/core.py +++ b/src/db/models/materialized_views/batch_url_status/core.py @@ -66,7 +66,7 @@ from src.db.models.templates_.base import Base -class BatchURLStatusMatView( +class BatchURLStatusMaterializedView( Base, ViewMixin, BatchDependentMixin diff --git a/src/db/models/materialized_views/batch_url_status/enums.py b/src/db/models/materialized_views/batch_url_status/enums.py index 033d2c14..2ce74325 100644 --- a/src/db/models/materialized_views/batch_url_status/enums.py +++ b/src/db/models/materialized_views/batch_url_status/enums.py @@ -1,8 +1,8 @@ from enum import Enum -class BatchURLStatusEnum(Enum): +class BatchURLStatusViewEnum(Enum): ERROR = "Error" - UNLABELED_URLS = "Has Unlabeled URLs" NO_URLS = "No URLs" - LABELING_COMPLETE = "Labeling Complete" \ No newline at end of file + LABELING_COMPLETE = "Labeling Complete" + HAS_UNLABELED_URLS = "Has Unlabeled URLs" \ No newline at end of file diff --git a/src/db/models/materialized_views/batch_url_status/sqlalchemy.py b/src/db/models/materialized_views/batch_url_status/sqlalchemy.py deleted file mode 100644 index 7c93cfc6..00000000 --- a/src/db/models/materialized_views/batch_url_status/sqlalchemy.py +++ /dev/null @@ -1,14 +0,0 @@ -from sqlalchemy.orm import Mapped - -from src.db.models.mixins import ViewMixin, BatchDependentMixin -from src.db.models.templates_.base import Base - - -class BatchURLStatusMaterializedView( - Base, - ViewMixin, - BatchDependentMixin -): - - __tablename__ = "batch_url_status_mat_view" - batch_url_status: Mapped[str] \ No newline at end of file diff --git a/src/db/models/materialized_views/url_status/enums.py b/src/db/models/materialized_views/url_status/enums.py index ebb0c2a1..a467a33d 100644 --- a/src/db/models/materialized_views/url_status/enums.py +++ b/src/db/models/materialized_views/url_status/enums.py @@ -1,20 +1,10 @@ -""" +from enum import Enum -ASE status_text.status - WHEN 'Intake'::text THEN 100 - WHEN 'Error'::text THEN 110 - WHEN 'Community Labeling'::text THEN 200 - WHEN 'Accepted'::text THEN 300 - WHEN 'Awaiting Submission'::text THEN 380 - WHEN 'Submitted'::text THEN 390 -""" -from sqlalchemy import Enum - -class URLStatusEnum(Enum): +class URLStatusViewEnum(Enum): INTAKE = "Intake" - ERROR = "Error" - COMMUNITY_LABELING = "Community Labeling" ACCEPTED = "Accepted" AWAITING_SUBMISSION = "Awaiting Submission" - SUBMITTED = "Submitted" \ No newline at end of file + SUBMITTED = "Submitted" + ERROR = "Error" + COMMUNITY_LABELING = "Community Labeling" \ No newline at end of file diff --git a/src/db/models/views/batch_url_status/enums.py b/src/db/models/views/batch_url_status/enums.py deleted file mode 100644 index 2f524de4..00000000 --- a/src/db/models/views/batch_url_status/enums.py +++ /dev/null @@ -1,8 +0,0 @@ -from enum import Enum - - -class BatchURLStatusEnum(Enum): - ERROR = "Error" - NO_URLS = "No URLs" - LABELING_COMPLETE = "Labeling Complete" - HAS_UNLABELED_URLS = "Has Unlabeled URLs" \ No newline at end of file diff --git a/src/db/models/views/url_status/__init__.py b/src/db/models/views/url_status/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/db/models/views/url_status/core.py b/src/db/models/views/url_status/core.py deleted file mode 100644 index be771fe5..00000000 --- a/src/db/models/views/url_status/core.py +++ /dev/null @@ -1,72 +0,0 @@ -""" - CREATE MATERIALIZED VIEW url_status_mat_view AS - with - urls_with_relevant_errors as ( - select - ute.url_id - from - url_task_error ute - where - ute.task_type in ( - 'Screenshot', - 'HTML', - 'URL Probe' - ) - ) - select - u.id as url_id, - case - when ( - -- Validated as not relevant, individual record, or not found - fuv.type in ('not relevant', 'individual record', 'not found') - -- Has Meta URL in data sources app - OR udmu.url_id is not null - -- Has data source in data sources app - OR uds.url_id is not null - ) Then 'Submitted/Pipeline Complete' - when fuv.type is not null THEN 'Accepted' - when ( - -- Has compressed HTML - uch.url_id is not null - AND - -- Has web metadata - uwm.url_id is not null - AND - -- Has screenshot - us.url_id is not null - ) THEN 'Community Labeling' - when uwre.url_id is not null then 'Error' - ELSE 'Intake' - END as status - - from - urls u - left join urls_with_relevant_errors uwre - on u.id = uwre.url_id - left join url_screenshot us - on u.id = us.url_id - left join url_compressed_html uch - on u.id = uch.url_id - left join url_web_metadata uwm - on u.id = uwm.url_id - left join flag_url_validated fuv - on u.id = fuv.url_id - left join url_ds_meta_url udmu - on u.id = udmu.url_id - left join url_data_source uds - on u.id = uds.url_id -""" -from sqlalchemy import String, Column - -from src.db.models.helpers import url_id_primary_key_constraint -from src.db.models.mixins import ViewMixin, URLDependentMixin, URLDependentViewMixin -from src.db.models.templates_.base import Base - - -class URLStatusMatView( - Base, - URLDependentViewMixin -): - __tablename__ = "url_status_mat_view" - - status = Column(String) \ No newline at end of file diff --git a/src/db/models/views/url_status/enums.py b/src/db/models/views/url_status/enums.py deleted file mode 100644 index a467a33d..00000000 --- a/src/db/models/views/url_status/enums.py +++ /dev/null @@ -1,10 +0,0 @@ -from enum import Enum - - -class URLStatusViewEnum(Enum): - INTAKE = "Intake" - ACCEPTED = "Accepted" - AWAITING_SUBMISSION = "Awaiting Submission" - SUBMITTED = "Submitted" - ERROR = "Error" - COMMUNITY_LABELING = "Community Labeling" \ No newline at end of file diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/builder.py b/src/db/queries/implementations/core/get/recent_batch_summaries/builder.py index 5de2eb55..f5696e7e 100644 --- a/src/db/queries/implementations/core/get/recent_batch_summaries/builder.py +++ b/src/db/queries/implementations/core/get/recent_batch_summaries/builder.py @@ -5,10 +5,9 @@ from src.api.endpoints.batch.dtos.get.summaries.counts import BatchSummaryURLCounts from src.api.endpoints.batch.dtos.get.summaries.summary import BatchSummary from src.collectors.enums import CollectorType -from src.core.enums import BatchStatus from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.views.batch_url_status.core import BatchURLStatusMatView -from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum +from src.db.models.materialized_views.batch_url_status.core import BatchURLStatusMaterializedView +from src.db.models.materialized_views.batch_url_status.enums import BatchURLStatusViewEnum from src.db.queries.base.builder import QueryBuilderBase from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.builder import URLCountsCTEQueryBuilder from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.labels import URLCountsLabels @@ -20,7 +19,7 @@ def __init__( self, page: int = 1, collector_type: CollectorType | None = None, - status: BatchURLStatusEnum | None = None, + status: BatchURLStatusViewEnum | None = None, batch_id: int | None = None, ): super().__init__() @@ -41,7 +40,7 @@ async def run(self, session: AsyncSession) -> list[BatchSummary]: *builder.get_all(), Batch.strategy, Batch.status, - BatchURLStatusMatView.batch_url_status, + BatchURLStatusMaterializedView.batch_url_status, Batch.parameters, Batch.user_id, Batch.compute_time, @@ -50,8 +49,8 @@ async def run(self, session: AsyncSession) -> list[BatchSummary]: builder.query, builder.get(count_labels.batch_id) == Batch.id, ).outerjoin( - BatchURLStatusMatView, - BatchURLStatusMatView.batch_id == Batch.id, + BatchURLStatusMaterializedView, + BatchURLStatusMaterializedView.batch_id == Batch.id, ).order_by( Batch.id.asc() ) @@ -75,7 +74,6 @@ async def run(self, session: AsyncSession) -> list[BatchSummary]: date_generated=row.date_generated, url_counts=BatchSummaryURLCounts( total=row[count_labels.total], - duplicate=row[count_labels.duplicate], not_relevant=row[count_labels.not_relevant], submitted=row[count_labels.submitted], errored=row[count_labels.error], diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py index 9eb9ef4c..7192f1fa 100644 --- a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py +++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py @@ -3,8 +3,8 @@ from src.collectors.enums import CollectorType from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.views.batch_url_status.core import BatchURLStatusMatView -from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum +from src.db.models.materialized_views.batch_url_status.core import BatchURLStatusMaterializedView +from src.db.models.materialized_views.batch_url_status.enums import BatchURLStatusViewEnum from src.db.queries.base.builder import QueryBuilderBase from src.db.queries.helpers import add_page_offset from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.all import ALL_CTE @@ -21,7 +21,7 @@ def __init__( self, page: int = 1, collector_type: CollectorType | None = None, - status: BatchURLStatusEnum | None = None, + status: BatchURLStatusViewEnum | None = None, batch_id: int | None = None ): super().__init__(URLCountsLabels()) @@ -44,8 +44,8 @@ def get_core_query(self): ) .select_from(Batch) .join( - BatchURLStatusMatView, - BatchURLStatusMatView.batch_id == Batch.id, + BatchURLStatusMaterializedView, + BatchURLStatusMaterializedView.batch_id == Batch.id, ) ) for cte in [SUBMITTED_CTE, PENDING_CTE, ALL_CTE, NOT_RELEVANT_CTE, ERROR_CTE]: @@ -78,4 +78,4 @@ def apply_collector_type_filter(self, query: Select): def apply_status_filter(self, query: Select): if self.status is None: return query - return query.where(BatchURLStatusMatView.batch_url_status == self.status.value) + return query.where(BatchURLStatusMaterializedView.batch_url_status == self.status.value) diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/labels.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/labels.py index c55d8f45..72806c13 100644 --- a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/labels.py +++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/labels.py @@ -11,6 +11,5 @@ class URLCountsLabels(LabelsBase): submitted: str = "count_submitted" not_relevant: str = "count_not_relevant" error: str = "count_error" - duplicate: str = "count_duplicate" diff --git a/src/db/statement_composer.py b/src/db/statement_composer.py index 512062a6..6834ee1e 100644 --- a/src/db/statement_composer.py +++ b/src/db/statement_composer.py @@ -24,57 +24,6 @@ class StatementComposer: Assists in the composition of SQLAlchemy statements """ - @staticmethod - def exclude_completed_html_task_subquery() -> ColumnElement[bool]: - return ~exists( - select(1) - .select_from( - LinkTaskURL - ) - .join( - Task, - LinkTaskURL.task_id == Task.id - ) - .where( - LinkTaskURL.url_id == URL.id, - Task.task_type == TaskType.HTML.value, - Task.task_status == TaskStatus.COMPLETE.value - ) - ) - - @staticmethod - def has_non_errored_urls_without_html_data() -> Select: - query = ( - select( - URL.id, - LinkBatchURL.batch_id, - URL.full_url, - URL.collector_metadata, - URLStatusMaterializedView.status, - URL.created_at, - URL.updated_at, - URL.name - ) - .join( - URLWebMetadata - ) - .outerjoin( - LinkBatchURL - ) - .join( - URLStatusMaterializedView - ) - .outerjoin( - URLScrapeInfo - ) - .where( - URLScrapeInfo.url_id == None, - StatementComposer.exclude_completed_html_task_subquery, - URLWebMetadata.content_type.like("%html%"), - ) - ) - return query - @staticmethod def simple_count_subquery(model, attribute: str, label: str) -> Subquery: attr_value = getattr(model, attribute) diff --git a/tests/automated/integration/api/_helpers/RequestValidator.py b/tests/automated/integration/api/_helpers/RequestValidator.py index 0db00cb3..851e75fb 100644 --- a/tests/automated/integration/api/_helpers/RequestValidator.py +++ b/tests/automated/integration/api/_helpers/RequestValidator.py @@ -36,7 +36,7 @@ from src.collectors.impl.example.dtos.input import ExampleInputDTO from src.core.enums import BatchStatus from src.db.enums import TaskType -from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum +from src.db.models.materialized_views.batch_url_status.enums import BatchURLStatusViewEnum from src.util.helper_functions import update_if_not_none @@ -268,7 +268,7 @@ def delete( def get_batch_statuses( self, collector_type: CollectorType | None = None, - status: BatchURLStatusEnum | None = None, + status: BatchURLStatusViewEnum | None = None, ) -> GetBatchSummariesResponse: params = {} update_if_not_none( diff --git a/tests/automated/integration/api/batch/summaries/test_happy_path.py b/tests/automated/integration/api/batch/summaries/test_happy_path.py index 6af9ce2b..126f1118 100644 --- a/tests/automated/integration/api/batch/summaries/test_happy_path.py +++ b/tests/automated/integration/api/batch/summaries/test_happy_path.py @@ -68,7 +68,6 @@ async def test_get_batch_summaries(api_test_helper): assert counts_1.pending == 1 assert counts_1.submitted == 2 assert counts_1.not_relevant == 0 - assert counts_1.duplicate == 0 assert counts_1.errored == 0 result_2 = results[1] @@ -79,7 +78,6 @@ async def test_get_batch_summaries(api_test_helper): assert counts_2.errored == 0 assert counts_2.pending == 0 assert counts_2.submitted == 0 - assert counts_2.duplicate == 0 result_3 = results[2] assert result_3.id == batch_3_id @@ -89,4 +87,3 @@ async def test_get_batch_summaries(api_test_helper): assert counts_3.errored == 0 assert counts_3.pending == 7 assert counts_3.submitted == 1 - assert counts_3.duplicate == 7 diff --git a/tests/automated/integration/api/batch/summaries/test_pending_url_filter.py b/tests/automated/integration/api/batch/summaries/test_pending_url_filter.py index f4181629..7ebc4ccf 100644 --- a/tests/automated/integration/api/batch/summaries/test_pending_url_filter.py +++ b/tests/automated/integration/api/batch/summaries/test_pending_url_filter.py @@ -3,7 +3,7 @@ from src.collectors.enums import CollectorType from src.core.enums import BatchStatus from src.db.dtos.url.mapping_.simple import SimpleURLMapping -from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum +from src.db.models.materialized_views.batch_url_status.enums import BatchURLStatusViewEnum from tests.helpers.batch_creation_parameters.enums import URLCreationEnum from tests.helpers.data_creator.core import DBDataCreator @@ -52,7 +52,7 @@ async def test_get_batch_summaries_pending_url_filter(api_test_helper): # Test filter for pending URLs and only retrieve the second batch pending_urls_results = ath.request_validator.get_batch_statuses( - status=BatchURLStatusEnum.HAS_UNLABELED_URLS + status=BatchURLStatusViewEnum.HAS_UNLABELED_URLS ) assert len(pending_urls_results.results) == 1 diff --git a/tests/automated/integration/api/metrics/batches/test_aggregated.py b/tests/automated/integration/api/metrics/batches/test_aggregated.py index 3d84d6d7..6142a345 100644 --- a/tests/automated/integration/api/metrics/batches/test_aggregated.py +++ b/tests/automated/integration/api/metrics/batches/test_aggregated.py @@ -25,12 +25,10 @@ async def test_get_batches_aggregated_metrics( ) url_mappings_broken: list[SimpleURLMapping] = await create_urls( adb_client=adb_client, - status=URLStatus.BROKEN, count=4, ) url_mappings_ok: list[SimpleURLMapping] = await create_urls( adb_client=adb_client, - status=URLStatus.OK, count=11, ) url_mappings_all: list[SimpleURLMapping] = url_mappings_broken + url_mappings_ok diff --git a/tests/automated/integration/api/metrics/urls/aggregated/test_core.py b/tests/automated/integration/api/metrics/urls/aggregated/test_core.py index e203b722..224e7d33 100644 --- a/tests/automated/integration/api/metrics/urls/aggregated/test_core.py +++ b/tests/automated/integration/api/metrics/urls/aggregated/test_core.py @@ -46,7 +46,7 @@ async def test_get_urls_aggregated_metrics(api_test_helper): batch_2: int = await ddc.create_batch( strategy=CollectorType.AUTO_GOOGLER, ) - url_mappings_2_ok: list[SimpleURLMapping] = await ddc.create_urls(batch_id=batch_2, count=4, status=URLStatus.OK) + url_mappings_2_ok: list[SimpleURLMapping] = await ddc.create_urls(batch_id=batch_2, count=4) url_mappings_2_validated: list[SimpleURLMapping] = await ddc.create_validated_urls(count=1, validation_type=URLType.DATA_SOURCE) url_mappings_2_not_relevant: list[SimpleURLMapping] = await ddc.create_validated_urls(count=5, validation_type=URLType.NOT_RELEVANT) url_ids_2_validated: list[int] = [url_mapping.url_id for url_mapping in url_mappings_2_validated] diff --git a/tests/automated/integration/api/submit/data_source/test_duplicate.py b/tests/automated/integration/api/submit/data_source/test_duplicate.py index ea16e1ec..37fb9703 100644 --- a/tests/automated/integration/api/submit/data_source/test_duplicate.py +++ b/tests/automated/integration/api/submit/data_source/test_duplicate.py @@ -7,6 +7,7 @@ from src.core.enums import RecordType from src.db.dtos.url.mapping_.simple import SimpleURLMapping from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum from tests.helpers.api_test_helper import APITestHelper from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo @@ -18,6 +19,7 @@ async def test_submit_data_source_duplicate( pittsburgh_locality: LocalityCreationInfo, test_url_data_source_mapping: SimpleURLMapping ): + await api_test_helper.adb_client().refresh_materialized_views() ath = api_test_helper try: @@ -34,5 +36,5 @@ async def test_submit_data_source_duplicate( model = SubmitDataSourceURLDuplicateSubmissionResponse(**response) assert model.url_id == test_url_data_source_mapping.url_id assert model.url_type == URLType.DATA_SOURCE - assert model.url_status == URLStatus.OK + assert model.url_status == URLStatusViewEnum.AWAITING_SUBMISSION assert model.message == "Duplicate URL found" diff --git a/tests/automated/integration/api/url/test_get.py b/tests/automated/integration/api/url/test_get.py index 8c95c670..d1607f7c 100644 --- a/tests/automated/integration/api/url/test_get.py +++ b/tests/automated/integration/api/url/test_get.py @@ -28,6 +28,7 @@ async def test_get_urls(api_test_helper: APITestHelper): # Add errors await db_data_creator.task_errors(url_ids=url_ids) + await api_test_helper.adb_client().refresh_materialized_views() data: GetURLsResponseInfo = api_test_helper.request_validator.get_urls() assert data.count == 3 diff --git a/tests/automated/integration/conftest.py b/tests/automated/integration/conftest.py index 22537d20..1f7836ae 100644 --- a/tests/automated/integration/conftest.py +++ b/tests/automated/integration/conftest.py @@ -245,7 +245,6 @@ async def test_url_id( url="example.com", source=URLSource.COLLECTOR, trailing_slash=False, - status=URLStatus.OK ) return await db_data_creator.adb_client.add(url, return_id=True) diff --git a/tests/automated/integration/db/structure/test_updated_at.py b/tests/automated/integration/db/structure/test_updated_at.py index 0a4c18a4..31d40dbd 100644 --- a/tests/automated/integration/db/structure/test_updated_at.py +++ b/tests/automated/integration/db/structure/test_updated_at.py @@ -14,7 +14,6 @@ async def test_updated_at(db_data_creator: DBDataCreator): _ = await db_data_creator.create_urls( count=1, - status=URLStatus.OK ) urls: list[URL] = await db_data_creator.adb_client.get_all(URL) diff --git a/tests/automated/integration/readonly/setup/data_source.py b/tests/automated/integration/readonly/setup/data_source.py index e22929ee..7c626d04 100644 --- a/tests/automated/integration/readonly/setup/data_source.py +++ b/tests/automated/integration/readonly/setup/data_source.py @@ -27,7 +27,6 @@ async def add_maximal_data_source( collector_metadata={ "url": "https://read-only.com/" }, - status=URLStatus.OK, source=URLSource.COLLECTOR, ) url_id: int = await adb_client.add(url, return_id=True) @@ -82,7 +81,6 @@ async def add_minimal_data_source( name="Minimal name", trailing_slash=False, collector_metadata={}, - status=URLStatus.OK, source=URLSource.ROOT_URL, ) url_id: int = await adb_client.add(url, return_id=True) diff --git a/tests/automated/integration/readonly/setup/meta_url.py b/tests/automated/integration/readonly/setup/meta_url.py index 837274bb..e1e32c7f 100644 --- a/tests/automated/integration/readonly/setup/meta_url.py +++ b/tests/automated/integration/readonly/setup/meta_url.py @@ -20,7 +20,6 @@ async def add_meta_url( collector_metadata={ "url": "https://read-only-meta-url.com/" }, - status=URLStatus.OK, source=URLSource.REDIRECT, ) url_id: int = await adb_client.add(url, return_id=True) diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py index 1d1085a5..a8a839d1 100644 --- a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py @@ -38,7 +38,6 @@ async def run(self, session: AsyncSession) -> list[int]: url = URL( url=get_test_url(i), scheme=None, - status=URLStatus.OK, name=name, description=description, source=URLSource.COLLECTOR, diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py index fa31dc40..f9faf657 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py @@ -79,7 +79,6 @@ async def test_add( assert content.access_notes is None assert content.access_types == [] assert content.data_portal_type_other is None - assert content.url_status == DataSourcesURLStatus.OK assert content.agency_ids == [test_agency_id] diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/test_add.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/test_add.py index e63e1496..dcdfb56b 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/test_add.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/test_add.py @@ -22,6 +22,11 @@ async def test_add( mock_pdap_client: PDAPClient, test_agency_id: int ): + await db_data_creator.create_web_metadata( + url_ids=[test_url_meta_url_id] + ) + + await db_data_creator.adb_client.refresh_materialized_views() operator = DSAppSyncMetaURLsAddTaskOperator( adb_client=adb_client_test, pdap_client=mock_pdap_client @@ -46,7 +51,6 @@ async def test_add( # Run task and confirm runs without error await run_task_and_confirm_success(operator) - # Confirm expected method was called with expected parameters request: AddMetaURLsOuterRequest = extract_and_validate_sync_request( mock_pdap_client, diff --git a/tests/automated/integration/tasks/scheduled/loader/test_happy_path.py b/tests/automated/integration/tasks/scheduled/loader/test_happy_path.py index cb70ff8c..4e5bb551 100644 --- a/tests/automated/integration/tasks/scheduled/loader/test_happy_path.py +++ b/tests/automated/integration/tasks/scheduled/loader/test_happy_path.py @@ -2,7 +2,7 @@ from src.core.tasks.scheduled.loader import ScheduledTaskOperatorLoader -NUMBER_OF_ENTRIES = 22 +NUMBER_OF_ENTRIES = 21 @pytest.mark.asyncio async def test_happy_path( diff --git a/tests/automated/integration/tasks/url/impl/html/check/__init__.py b/tests/automated/integration/tasks/url/impl/html/check/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/automated/integration/tasks/url/impl/html/check/manager.py b/tests/automated/integration/tasks/url/impl/html/check/manager.py deleted file mode 100644 index 56c721fa..00000000 --- a/tests/automated/integration/tasks/url/impl/html/check/manager.py +++ /dev/null @@ -1,70 +0,0 @@ -from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML -from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo -from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata -from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView -from tests.automated.integration.tasks.url.impl.html.setup.models.entry import TestURLHTMLTaskSetupEntry -from tests.automated.integration.tasks.url.impl.html.setup.models.record import TestURLHTMLTaskSetupRecord - - -class TestURLHTMLTaskCheckManager: - - def __init__( - self, - adb_client: AsyncDatabaseClient, - records: list[TestURLHTMLTaskSetupRecord] - ): - self.adb_client = adb_client - self.records = records - self._id_to_entry = {record.url_id: record.entry for record in records} - - async def check(self): - await self._check_has_html() - await self._check_scrape_status() - await self._check_has_same_url_status() - await self._check_marked_as_404() - - async def _check_has_html(self) -> None: - urls_with_html = [ - record.url_id - for record in self.records - if record.entry.expected_result.has_html - ] - - compressed_html_list: list[URLCompressedHTML] = await self.adb_client.get_all(URLCompressedHTML) - assert len(compressed_html_list) == len(urls_with_html) - for compressed_html in compressed_html_list: - assert compressed_html.url_id in urls_with_html - - async def _check_scrape_status(self) -> None: - urls_with_scrape_status = [ - record.url_id - for record in self.records - if record.entry.expected_result.scrape_status is not None - ] - - url_scrape_info_list: list[URLScrapeInfo] = await self.adb_client.get_all(URLScrapeInfo) - assert len(url_scrape_info_list) == len(urls_with_scrape_status) - for url_scrape_info in url_scrape_info_list: - assert url_scrape_info.url_id in urls_with_scrape_status - entry = self._id_to_entry[url_scrape_info.url_id] - expected_scrape_status = entry.expected_result.scrape_status - assert url_scrape_info.status == expected_scrape_status - - async def _check_has_same_url_status(self): - urls: list[URLStatusMaterializedView] = await self.adb_client.get_all(URLStatusMaterializedView) - for url in urls: - entry: TestURLHTMLTaskSetupEntry = self._id_to_entry[url.id] - if entry.expected_result.web_metadata_status_marked_404: - continue - assert url.status == entry.url_info.status, f"URL {url.url} has outcome {url.status} instead of {entry.url_info.status}" - - async def _check_marked_as_404(self): - web_metadata_list: list[URLWebMetadata] = await self.adb_client.get_all( - URLWebMetadata - ) - for web_metadata in web_metadata_list: - entry = self._id_to_entry[web_metadata.url_id] - if entry.expected_result.web_metadata_status_marked_404: - assert web_metadata.status_code == 404, f"URL {entry.url_info.url} has status code {web_metadata.status_code} instead of 404" diff --git a/tests/automated/integration/tasks/url/impl/html/conftest.py b/tests/automated/integration/tasks/url/impl/html/conftest.py new file mode 100644 index 00000000..b73a93e5 --- /dev/null +++ b/tests/automated/integration/tasks/url/impl/html/conftest.py @@ -0,0 +1,28 @@ +import types + +import pytest + +from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator +from src.core.tasks.url.operators.html.scraper.parser.core import HTMLResponseParser +from src.db.client.async_ import AsyncDatabaseClient +from src.external.url_request.dtos.url_response import URLResponseInfo +from tests.automated.integration.tasks.url.impl.html.mocks.methods import mock_parse + + +class _MockURLRequestInterface: + + async def make_requests_with_html(self, urls: list[str]) -> list[URLResponseInfo]: + return [] + +@pytest.fixture +def operator( + adb_client_test: AsyncDatabaseClient +) -> URLHTMLTaskOperator: + html_parser = HTMLResponseParser() + html_parser.parse = types.MethodType(mock_parse, html_parser) + operator = URLHTMLTaskOperator( + adb_client=adb_client_test, + url_request_interface=_MockURLRequestInterface(), + html_parser=html_parser + ) + return operator \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/mocks/methods.py b/tests/automated/integration/tasks/url/impl/html/mocks/methods.py index d6799eea..5e2533d0 100644 --- a/tests/automated/integration/tasks/url/impl/html/mocks/methods.py +++ b/tests/automated/integration/tasks/url/impl/html/mocks/methods.py @@ -10,6 +10,3 @@ async def mock_parse(self, url: str, html_content: str, content_type: str) -> Re description="fake description", ) - -async def mock_get_from_cache(self, url: str) -> Optional[str]: - return None diff --git a/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/__init__.py b/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/core.py b/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/core.py deleted file mode 100644 index 49e6b1f3..00000000 --- a/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/core.py +++ /dev/null @@ -1,11 +0,0 @@ -from src.external.url_request.dtos.url_response import URLResponseInfo -from tests.automated.integration.tasks.url.impl.html.mocks.url_request_interface.setup import setup_url_to_response_info - - -class MockURLRequestInterface: - - def __init__(self): - self._url_to_response_info: dict[str, URLResponseInfo] = setup_url_to_response_info() - - async def make_requests_with_html(self, urls: list[str]) -> list[URLResponseInfo]: - return [self._url_to_response_info[url] for url in urls] \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/setup.py b/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/setup.py deleted file mode 100644 index c0dbef6a..00000000 --- a/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/setup.py +++ /dev/null @@ -1,57 +0,0 @@ -from http import HTTPStatus - -from src.external.url_request.dtos.url_response import URLResponseInfo -from tests.automated.integration.tasks.url.impl.html.setup.data import TEST_ENTRIES -from tests.automated.integration.tasks.url.impl.html.setup.models.entry import TestURLHTMLTaskSetupEntry, TestErrorType - - -def _get_success( - entry: TestURLHTMLTaskSetupEntry -) -> bool: - if entry.give_error is not None: - return False - return True - -def get_http_status( - entry: TestURLHTMLTaskSetupEntry -) -> HTTPStatus: - if entry.give_error is None: - return HTTPStatus.OK - if entry.give_error == TestErrorType.HTTP_404: - return HTTPStatus.NOT_FOUND - return HTTPStatus.INTERNAL_SERVER_ERROR - -def _get_content_type( - entry: TestURLHTMLTaskSetupEntry -) -> str | None: - if entry.give_error is not None: - return None - return "text/html" - -def _generate_test_html() -> str: - return """ - - - - Example HTML - - -

Example HTML

-

This is an example of HTML content.

- - - """ - -def setup_url_to_response_info( -) -> dict[str, URLResponseInfo]: - d = {} - for entry in TEST_ENTRIES: - response_info = URLResponseInfo( - success=_get_success(entry), - status=get_http_status(entry), - html=_generate_test_html() if _get_success(entry) else None, - content_type=_get_content_type(entry), - exception=None if _get_success(entry) else "Error" - ) - d[entry.url_info.url] = response_info - return d diff --git a/tests/automated/integration/tasks/url/impl/html/setup/__init__.py b/tests/automated/integration/tasks/url/impl/html/setup/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/automated/integration/tasks/url/impl/html/setup/data.py b/tests/automated/integration/tasks/url/impl/html/setup/data.py deleted file mode 100644 index a3a43f8b..00000000 --- a/tests/automated/integration/tasks/url/impl/html/setup/data.py +++ /dev/null @@ -1,94 +0,0 @@ -from http import HTTPStatus - -from src.collectors.enums import URLStatus -from src.db.models.impl.url.scrape_info.enums import ScrapeStatus -from tests.automated.integration.tasks.url.impl.html.setup.models.entry import TestURLHTMLTaskSetupEntry, TestURLInfo, \ - TestWebMetadataInfo, ExpectedResult, TestErrorType - -TEST_ENTRIES = [ - # URLs that give 200s should be updated with the appropriate scrape status - # and their html should be stored - TestURLHTMLTaskSetupEntry( - url_info=TestURLInfo( - url="happy-path.com/pending", - status=URLStatus.OK - ), - web_metadata_info=TestWebMetadataInfo( - accessed=True, - content_type="text/html", - response_code=HTTPStatus.OK, - error_message=None - ), - expected_result=ExpectedResult( - has_html=True, # Test for both compressed HTML and content metadata - scrape_status=ScrapeStatus.SUCCESS - ) - ), - # URLs that give 404s should be updated with the appropriate scrape status - # and their web metadata status should be updated to 404 - TestURLHTMLTaskSetupEntry( - url_info=TestURLInfo( - url="not-found-path.com/submitted", - status=URLStatus.OK - ), - web_metadata_info=TestWebMetadataInfo( - accessed=True, - content_type="text/html", - response_code=HTTPStatus.OK, - error_message=None - ), - give_error=TestErrorType.HTTP_404, - expected_result=ExpectedResult( - has_html=False, - scrape_status=ScrapeStatus.ERROR, - web_metadata_status_marked_404=True - ) - ), - # URLs that give errors should be updated with the appropriate scrape status - TestURLHTMLTaskSetupEntry( - url_info=TestURLInfo( - url="error-path.com/submitted", - status=URLStatus.OK - ), - web_metadata_info=TestWebMetadataInfo( - accessed=True, - content_type="text/html", - response_code=HTTPStatus.OK, - error_message=None - ), - give_error=TestErrorType.SCRAPER, - expected_result=ExpectedResult( - has_html=False, - scrape_status=ScrapeStatus.ERROR - ) - ), - # URLs with non-200 web metadata should not be processed - TestURLHTMLTaskSetupEntry( - url_info=TestURLInfo( - url="not-200-path.com/submitted", - status=URLStatus.OK - ), - web_metadata_info=TestWebMetadataInfo( - accessed=True, - content_type="text/html", - response_code=HTTPStatus.PERMANENT_REDIRECT, - error_message=None - ), - expected_result=ExpectedResult( - has_html=False, - scrape_status=None - ) - ), - # URLs with no web metadata should not be processed - TestURLHTMLTaskSetupEntry( - url_info=TestURLInfo( - url="no-web-metadata.com/submitted", - status=URLStatus.OK - ), - web_metadata_info=None, - expected_result=ExpectedResult( - has_html=False, - scrape_status=None - ) - ) -] \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/setup/manager.py b/tests/automated/integration/tasks/url/impl/html/setup/manager.py deleted file mode 100644 index e01f7b6d..00000000 --- a/tests/automated/integration/tasks/url/impl/html/setup/manager.py +++ /dev/null @@ -1,79 +0,0 @@ -import types - -from src.core.enums import RecordType -from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator -from src.core.tasks.url.operators.html.scraper.parser.core import HTMLResponseParser -from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.core.enums import URLSource -from src.db.models.impl.url.core.pydantic.insert import URLInsertModel -from src.db.models.impl.url.web_metadata.insert import URLWebMetadataPydantic -from tests.automated.integration.tasks.url.impl.html.mocks.methods import mock_parse -from tests.automated.integration.tasks.url.impl.html.mocks.url_request_interface.core import MockURLRequestInterface -from tests.automated.integration.tasks.url.impl.html.setup.data import TEST_ENTRIES -from tests.automated.integration.tasks.url.impl.html.setup.models.record import TestURLHTMLTaskSetupRecord - - -class TestURLHTMLTaskSetupManager: - - def __init__(self, adb_client: AsyncDatabaseClient): - self.adb_client = adb_client - - - async def setup(self) -> list[TestURLHTMLTaskSetupRecord]: - - records = await self._setup_urls() - await self.setup_web_metadata(records) - return records - - async def _setup_urls(self) -> list[TestURLHTMLTaskSetupRecord]: - url_insert_models: list[URLInsertModel] = [] - for entry in TEST_ENTRIES: - url_insert_model = URLInsertModel( - status=entry.url_info.status, - url=entry.url_info.url, - name=f"Test for {entry.url_info.url}", - record_type=RecordType.RESOURCES, - source=URLSource.COLLECTOR, - trailing_slash=False - ) - url_insert_models.append(url_insert_model) - url_ids = await self.adb_client.bulk_insert(url_insert_models, return_ids=True) - - records = [] - for url_id, entry in zip(url_ids, TEST_ENTRIES): - record = TestURLHTMLTaskSetupRecord( - url_id=url_id, - entry=entry - ) - records.append(record) - return records - - async def setup_web_metadata( - self, - records: list[TestURLHTMLTaskSetupRecord] - ) -> None: - models = [] - for record in records: - entry = record.entry - web_metadata_info = entry.web_metadata_info - if web_metadata_info is None: - continue - model = URLWebMetadataPydantic( - url_id=record.url_id, - accessed=web_metadata_info.accessed, - status_code=web_metadata_info.response_code.value, - content_type=web_metadata_info.content_type, - error_message=web_metadata_info.error_message - ) - models.append(model) - await self.adb_client.bulk_insert(models) - -async def setup_operator() -> URLHTMLTaskOperator: - html_parser = HTMLResponseParser() - html_parser.parse = types.MethodType(mock_parse, html_parser) - operator = URLHTMLTaskOperator( - adb_client=AsyncDatabaseClient(), - url_request_interface=MockURLRequestInterface(), - html_parser=html_parser - ) - return operator diff --git a/tests/automated/integration/tasks/url/impl/html/setup/models/__init__.py b/tests/automated/integration/tasks/url/impl/html/setup/models/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/automated/integration/tasks/url/impl/html/setup/models/entry.py b/tests/automated/integration/tasks/url/impl/html/setup/models/entry.py deleted file mode 100644 index 287bb52c..00000000 --- a/tests/automated/integration/tasks/url/impl/html/setup/models/entry.py +++ /dev/null @@ -1,34 +0,0 @@ -from enum import Enum -from http import HTTPStatus - -from pydantic import BaseModel - -from src.collectors.enums import URLStatus -from src.db.models.impl.url.scrape_info.enums import ScrapeStatus - - -class TestErrorType(Enum): - SCRAPER = "scraper" - HTTP_404 = "http-404" - - -class TestWebMetadataInfo(BaseModel): - accessed: bool - content_type: str | None - response_code: HTTPStatus - error_message: str | None - -class TestURLInfo(BaseModel): - url: str - status: URLStatus - -class ExpectedResult(BaseModel): - has_html: bool - scrape_status: ScrapeStatus | None # Does not have scrape info if none - web_metadata_status_marked_404: bool = False - -class TestURLHTMLTaskSetupEntry(BaseModel): - url_info: TestURLInfo - web_metadata_info: TestWebMetadataInfo | None - give_error: TestErrorType | None = None - expected_result: ExpectedResult \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/setup/models/record.py b/tests/automated/integration/tasks/url/impl/html/setup/models/record.py deleted file mode 100644 index 022c9639..00000000 --- a/tests/automated/integration/tasks/url/impl/html/setup/models/record.py +++ /dev/null @@ -1,8 +0,0 @@ -from pydantic import BaseModel - -from tests.automated.integration.tasks.url.impl.html.setup.models.entry import TestURLHTMLTaskSetupEntry - - -class TestURLHTMLTaskSetupRecord(BaseModel): - url_id: int - entry: TestURLHTMLTaskSetupEntry \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/test_200.py b/tests/automated/integration/tasks/url/impl/html/test_200.py new file mode 100644 index 00000000..886d4131 --- /dev/null +++ b/tests/automated/integration/tasks/url/impl/html/test_200.py @@ -0,0 +1,81 @@ +from http import HTTPStatus + +import pytest + +from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML +from src.db.models.impl.url.scrape_info.enums import ScrapeStatus +from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.external.url_request.dtos.url_response import URLResponseInfo +from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_met, assert_task_ran_without_error +from tests.helpers.data_creator.core import DBDataCreator + + +MOCK_HTML_CONTENT = """ + + + + Example HTML + + +

Example HTML

+

This is an example of HTML content.

+ + +""" + +class _MockURLRequestInterface: + + async def make_requests_with_html(self, urls: list[str]) -> list[URLResponseInfo]: + assert len(urls) == 1 + return [ + URLResponseInfo( + success=True, + status=HTTPStatus.OK, + exception=None, + html=MOCK_HTML_CONTENT, + content_type="text/html" + ) + ] + + +@pytest.mark.asyncio +async def test_200( + adb_client_test: AsyncDatabaseClient, + db_data_creator: DBDataCreator, + operator: URLHTMLTaskOperator, + test_url_id: int +): + """ + URLs that give 200s should be updated with the appropriate scrape status + and their html should be stored + """ + + await db_data_creator.create_web_metadata( + url_ids=[test_url_id], + status_code=200 + ) + + # Adjust Mock Request Interface to return a 404 + operator.url_request_interface = _MockURLRequestInterface() + + await assert_prereqs_met(operator) + + run_info = await operator.run_task() + assert_task_ran_without_error(run_info) + + # Check for the absence of Compressed HTML Data + results: list[URLCompressedHTML] = await adb_client_test.get_all(URLCompressedHTML) + assert len(results) == 1 + assert results[0].url_id == test_url_id + assert results[0].compressed_html is not None + + # Web Metadata should be unchanged + web_metadata: URLWebMetadata = (await adb_client_test.get_all(URLWebMetadata))[0] + assert web_metadata.status_code == 200 + + # Check that URLScrapeInfo is updated + scrape_info: URLScrapeInfo = (await adb_client_test.get_all(URLScrapeInfo))[0] + assert scrape_info.status == ScrapeStatus.SUCCESS \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/test_404.py b/tests/automated/integration/tasks/url/impl/html/test_404.py new file mode 100644 index 00000000..7057b70e --- /dev/null +++ b/tests/automated/integration/tasks/url/impl/html/test_404.py @@ -0,0 +1,65 @@ +from http import HTTPStatus + +import pytest + +from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML +from src.db.models.impl.url.scrape_info.enums import ScrapeStatus +from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.external.url_request.dtos.url_response import URLResponseInfo +from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_met, assert_task_ran_without_error +from tests.helpers.data_creator.core import DBDataCreator + +class _MockURLRequestInterface: + + async def make_requests_with_html(self, urls: list[str]) -> list[URLResponseInfo]: + assert len(urls) == 1 + return [ + URLResponseInfo( + success=False, + status=HTTPStatus.NOT_FOUND, + exception="Not Found" + ) + ] + + + +@pytest.mark.asyncio +async def test_404( + adb_client_test: AsyncDatabaseClient, + db_data_creator: DBDataCreator, + operator: URLHTMLTaskOperator, + test_url_id: int +): + """ + URLs that give 404s should be updated with the appropriate scrape status + and their web metadata status should be updated to 404 + """ + await db_data_creator.create_web_metadata( + url_ids=[test_url_id], + status_code=200 + ) + + + # Adjust Mock Request Interface to return a 404 + operator.url_request_interface = _MockURLRequestInterface() + + await assert_prereqs_met(operator) + + run_info = await operator.run_task() + assert_task_ran_without_error(run_info) + + + # Check for the absence of Compressed HTML Data + results: list[URLCompressedHTML] = await adb_client_test.get_all(URLCompressedHTML) + assert len(results) == 0 + + # Web Metadata should be unchanged + web_metadata: URLWebMetadata = (await adb_client_test.get_all(URLWebMetadata))[0] + assert web_metadata.status_code == 404 + + # Check that URLScrapeInfo is updated + scrape_info: URLScrapeInfo = (await adb_client_test.get_all(URLScrapeInfo))[0] + assert scrape_info.status == ScrapeStatus.ERROR \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/test_error.py b/tests/automated/integration/tasks/url/impl/html/test_error.py new file mode 100644 index 00000000..b00667ed --- /dev/null +++ b/tests/automated/integration/tasks/url/impl/html/test_error.py @@ -0,0 +1,62 @@ +from http import HTTPStatus + +import pytest + +from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML +from src.db.models.impl.url.scrape_info.enums import ScrapeStatus +from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.external.url_request.dtos.url_response import URLResponseInfo +from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_met, assert_task_ran_without_error +from tests.helpers.data_creator.core import DBDataCreator + +class _MockURLRequestInterface: + + async def make_requests_with_html(self, urls: list[str]) -> list[URLResponseInfo]: + assert len(urls) == 1 + return [ + URLResponseInfo( + success=False, + status=HTTPStatus.INTERNAL_SERVER_ERROR, + exception="Mock Exception" + ) + ] + +@pytest.mark.asyncio +async def test_error( + adb_client_test: AsyncDatabaseClient, + db_data_creator: DBDataCreator, + operator: URLHTMLTaskOperator, + test_url_id: int +): + """ + URLs that give errors should be updated with the appropriate scrape status + """ + await db_data_creator.create_web_metadata( + url_ids=[test_url_id], + status_code=200 + ) + + + # Adjust Mock Request Interface to return a 404 + operator.url_request_interface = _MockURLRequestInterface() + + await assert_prereqs_met(operator) + + run_info = await operator.run_task() + assert_task_ran_without_error(run_info) + + + # Check for the absence of Compressed HTML Data + results: list[URLCompressedHTML] = await adb_client_test.get_all(URLCompressedHTML) + assert len(results) == 0 + + # Web Metadata should be unchanged + web_metadata: URLWebMetadata = (await adb_client_test.get_all(URLWebMetadata))[0] + assert web_metadata.status_code == 200 + + # Check that URLScrapeInfo is updated + scrape_info: URLScrapeInfo = (await adb_client_test.get_all(URLScrapeInfo))[0] + assert scrape_info.status == ScrapeStatus.ERROR \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/test_no_web_metadata.py b/tests/automated/integration/tasks/url/impl/html/test_no_web_metadata.py new file mode 100644 index 00000000..36149177 --- /dev/null +++ b/tests/automated/integration/tasks/url/impl/html/test_no_web_metadata.py @@ -0,0 +1,27 @@ +import pytest + +from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML +from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_not_met, assert_task_ran_without_error +from tests.helpers.data_creator.core import DBDataCreator + + +@pytest.mark.asyncio +async def test_no_web_metadata( + adb_client_test: AsyncDatabaseClient, + operator: URLHTMLTaskOperator, + test_url_id: int +): + """ + URLs with no web metadata should not be processed + """ + await assert_prereqs_not_met(operator) + + run_info = await operator.run_task() + assert_task_ran_without_error(run_info) + + # Check for the absence of Compressed HTML Data + results: list[URLCompressedHTML] = await adb_client_test.get_all(URLCompressedHTML) + assert len(results) == 0 + diff --git a/tests/automated/integration/tasks/url/impl/html/test_non_200.py b/tests/automated/integration/tasks/url/impl/html/test_non_200.py new file mode 100644 index 00000000..0b80ba86 --- /dev/null +++ b/tests/automated/integration/tasks/url/impl/html/test_non_200.py @@ -0,0 +1,32 @@ +import pytest + +from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML +from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_not_met, assert_task_ran_without_error +from tests.helpers.data_creator.core import DBDataCreator + + +@pytest.mark.asyncio +async def test_non_200( + adb_client_test: AsyncDatabaseClient, + db_data_creator: DBDataCreator, + operator: URLHTMLTaskOperator, + test_url_id: int +): + """ + URLs with non-200 web metadata should not be processed + """ + await db_data_creator.create_web_metadata( + url_ids=[test_url_id], + status_code=500 + ) + + await assert_prereqs_not_met(operator) + + run_info = await operator.run_task() + assert_task_ran_without_error(run_info) + + # Check for the absence of Compressed HTML Data + results: list[URLCompressedHTML] = await adb_client_test.get_all(URLCompressedHTML) + assert len(results) == 0 \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/test_task.py b/tests/automated/integration/tasks/url/impl/html/test_task.py deleted file mode 100644 index e7462e65..00000000 --- a/tests/automated/integration/tasks/url/impl/html/test_task.py +++ /dev/null @@ -1,33 +0,0 @@ -import pytest - -from src.db.client.async_ import AsyncDatabaseClient -from src.db.enums import TaskType -from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_not_met, assert_prereqs_met, \ - assert_task_ran_without_error -from tests.automated.integration.tasks.url.impl.html.check.manager import TestURLHTMLTaskCheckManager -from tests.automated.integration.tasks.url.impl.html.setup.manager import setup_operator, \ - TestURLHTMLTaskSetupManager - - -@pytest.mark.asyncio -async def test_url_html_task(adb_client_test: AsyncDatabaseClient): - setup = TestURLHTMLTaskSetupManager(adb_client_test) - - operator = await setup_operator() - - # No URLs were created, the prereqs should not be met - await assert_prereqs_not_met(operator) - - records = await setup.setup() - await assert_prereqs_met(operator) - - run_info = await operator.run_task() - assert_task_ran_without_error(run_info) - - checker = TestURLHTMLTaskCheckManager( - adb_client=adb_client_test, - records=records - ) - await checker.check() - - await assert_prereqs_not_met(operator) diff --git a/tests/automated/integration/tasks/url/impl/probe/check/manager.py b/tests/automated/integration/tasks/url/impl/probe/check/manager.py index 40111201..9dd7f13d 100644 --- a/tests/automated/integration/tasks/url/impl/probe/check/manager.py +++ b/tests/automated/integration/tasks/url/impl/probe/check/manager.py @@ -1,12 +1,10 @@ from sqlalchemy import select -from src.collectors.enums import URLStatus from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.link.url_redirect_url.sqlalchemy import LinkURLRedirectURL -from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata -from src.db.models.materialized_views.url_status.enums import URLStatusEnum from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum class TestURLProbeCheckManager: @@ -20,13 +18,13 @@ def __init__( async def check_url( self, url_id: int, - expected_status: URLStatusEnum + expected_status: URLStatusViewEnum ): url: URLStatusMaterializedView = await self.adb_client.one_or_none( - statement=select(URLStatusMaterializedView).where(URLStatusMaterializedView.id == url_id) + statement=select(URLStatusMaterializedView).where(URLStatusMaterializedView.url_id == url_id) ) assert url is not None - assert url.status == expected_status + assert url.status == expected_status.value async def check_web_metadata( self, diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py index 85dd71f5..17b80c50 100644 --- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py @@ -3,6 +3,7 @@ from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager @@ -32,19 +33,15 @@ async def test_url_probe_task_error( ) ) assert not await operator.meets_task_prerequisites() - url_id: int = await setup_manager.setup_url(URLStatus.OK) + url_id: int = await setup_manager.setup_url() await db_data_creator.create_validated_flags([url_id], validation_type=URLType.DATA_SOURCE) await db_data_creator.create_url_data_sources([url_id]) assert await operator.meets_task_prerequisites() + run_info = await operator.run_task() assert_task_ran_without_error(run_info) assert not await operator.meets_task_prerequisites() - await check_manager.check_url( - url_id=url_id, - expected_status=URLStatus.OK - ) - await check_manager.check_web_metadata( url_id=url_id, diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py index 31216e23..d6d5e4d2 100644 --- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py @@ -2,6 +2,7 @@ from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager @@ -32,16 +33,13 @@ async def test_url_probe_task_not_found( ) ) assert not await operator.meets_task_prerequisites() - url_id = await setup_manager.setup_url(URLStatus.OK) + url_id = await setup_manager.setup_url() await db_data_creator.create_validated_flags([url_id], validation_type=URLType.NOT_RELEVANT) assert await operator.meets_task_prerequisites() run_info = await operator.run_task() assert_task_ran_without_error(run_info) assert not await operator.meets_task_prerequisites() - await check_manager.check_url( - url_id=url_id, - expected_status=URLStatus.OK - ) + await check_manager.check_web_metadata( url_id=url_id, status_code=404, diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py index ecaec084..86aa3438 100644 --- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py @@ -28,15 +28,12 @@ async def test_url_probe_task_no_redirect_ok( ) ) assert not await operator.meets_task_prerequisites() - url_id = await setup_manager.setup_url(URLStatus.OK) + url_id = await setup_manager.setup_url() assert await operator.meets_task_prerequisites() run_info = await operator.run_task() assert_task_ran_without_error(run_info) assert not await operator.meets_task_prerequisites() - await check_manager.check_url( - url_id=url_id, - expected_status=URLStatus.OK - ) + await check_manager.check_web_metadata( url_id=url_id, status_code=200, diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py index c3b0c6c4..6632277f 100644 --- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py @@ -31,8 +31,8 @@ async def test_two_urls( ] ) assert not await operator.meets_task_prerequisites() - url_id_1 = await setup_manager.setup_url(URLStatus.OK, url=url_1) - url_id_2 = await setup_manager.setup_url(URLStatus.OK, url=url_2) + url_id_1 = await setup_manager.setup_url(url_1) + url_id_2 = await setup_manager.setup_url(url_2) assert await operator.meets_task_prerequisites() run_info = await operator.run_task() assert_task_ran_without_error(run_info) diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py b/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py index df695021..ef4fba57 100644 --- a/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py +++ b/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py @@ -28,13 +28,10 @@ async def test_url_probe_task_redirect_dest_new_ok( dest_error=None ) ) - source_url_id = await setup_manager.setup_url(URLStatus.OK) + source_url_id = await setup_manager.setup_url() run_info = await operator.run_task() assert_task_ran_without_error(run_info) - await check_manager.check_url( - url_id=source_url_id, - expected_status=URLStatus.OK - ) + await check_manager.check_web_metadata( url_id=source_url_id, status_code=301, @@ -43,10 +40,7 @@ async def test_url_probe_task_redirect_dest_new_ok( accessed=True ) dest_url_id = await check_manager.check_redirect(source_url_id) - await check_manager.check_url( - url_id=dest_url_id, - expected_status=URLStatus.OK - ) + await check_manager.check_web_metadata( url_id=dest_url_id, status_code=200, diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py b/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py index 7aeeb1f8..d1b73274 100644 --- a/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py +++ b/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py @@ -29,8 +29,8 @@ async def test_url_probe_task_redirect_dest_exists_in_db( dest_error=None ) ) - source_url_id = await setup_manager.setup_url(URLStatus.OK) - dest_url_id = await setup_manager.setup_url(URLStatus.OK, url=TEST_DEST_URL.replace("https://", "")) + source_url_id = await setup_manager.setup_url() + dest_url_id = await setup_manager.setup_url(TEST_DEST_URL.replace("https://", "")) # Add web metadata for destination URL, to prevent it from being pulled web_metadata = URLWebMetadataPydantic( url_id=dest_url_id, @@ -42,14 +42,6 @@ async def test_url_probe_task_redirect_dest_exists_in_db( await setup_manager.adb_client.bulk_insert([web_metadata]) run_info = await operator.run_task() assert_task_ran_without_error(run_info) - await check_manager.check_url( - url_id=source_url_id, - expected_status=URLStatus.OK - ) - await check_manager.check_url( - url_id=dest_url_id, - expected_status=URLStatus.OK - ) await check_manager.check_web_metadata( url_id=source_url_id, status_code=302, diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/test_functional_equivalent.py b/tests/automated/integration/tasks/url/impl/probe/redirect/test_functional_equivalent.py index a8cb51f7..cc6ef650 100644 --- a/tests/automated/integration/tasks/url/impl/probe/redirect/test_functional_equivalent.py +++ b/tests/automated/integration/tasks/url/impl/probe/redirect/test_functional_equivalent.py @@ -31,7 +31,7 @@ async def test_url_probe_task_functional_equivalent( redirect_url=FullURL(TEST_URL + "/") ) ) - url_id = await setup_manager.setup_url(URLStatus.OK) + url_id = await setup_manager.setup_url() await run_task_and_confirm_success(operator) urls: list[URL] = await setup_manager.adb_client.get_all(URL) diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py b/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py index 1dcd98d9..c8654b85 100644 --- a/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py +++ b/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py @@ -35,18 +35,11 @@ async def test_url_probe_task_redirect_two_urls_same_dest( ), ] ) - source_url_id_1 = await setup_manager.setup_url(URLStatus.OK) - source_url_id_2 = await setup_manager.setup_url(URLStatus.OK, url="example.com/2") + source_url_id_1 = await setup_manager.setup_url() + source_url_id_2 = await setup_manager.setup_url("example.com/2") run_info = await operator.run_task() assert_task_ran_without_error(run_info) - await check_manager.check_url( - url_id=source_url_id_1, - expected_status=URLStatus.OK - ) - await check_manager.check_url( - url_id=source_url_id_2, - expected_status=URLStatus.OK - ) + redirect_url_id_1 = await check_manager.check_redirect( source_url_id=source_url_id_1 ) diff --git a/tests/automated/integration/tasks/url/impl/probe/setup/manager.py b/tests/automated/integration/tasks/url/impl/probe/setup/manager.py index 44b5bd54..4b3d16c2 100644 --- a/tests/automated/integration/tasks/url/impl/probe/setup/manager.py +++ b/tests/automated/integration/tasks/url/impl/probe/setup/manager.py @@ -23,12 +23,10 @@ def __init__( async def setup_url( self, - url_status: URLStatus, url: str = TEST_URL ) -> int: url_insert_model = URLInsertModel( url=url, - status=url_status, source=TEST_SOURCE, trailing_slash=False ) diff --git a/tests/helpers/data_creator/commands/impl/urls_/query.py b/tests/helpers/data_creator/commands/impl/urls_/query.py index c4fddad4..c56a88ef 100644 --- a/tests/helpers/data_creator/commands/impl/urls_/query.py +++ b/tests/helpers/data_creator/commands/impl/urls_/query.py @@ -40,7 +40,6 @@ def run_sync(self) -> InsertURLsInfo: url_infos.append( URLInfo( url=url, - status=convert_url_creation_enum_to_url_status(self.status), name="Test Name" if self.status in ( URLCreationEnum.VALIDATED, URLCreationEnum.SUBMITTED, diff --git a/tests/helpers/data_creator/core.py b/tests/helpers/data_creator/core.py index d3f6c924..1b4d6cb7 100644 --- a/tests/helpers/data_creator/core.py +++ b/tests/helpers/data_creator/core.py @@ -439,7 +439,6 @@ async def create_submitted_urls( async def create_urls( self, - status: URLStatus = URLStatus.OK, source: URLSource = URLSource.COLLECTOR, record_type: RecordType | None = RecordType.RESOURCES, collector_metadata: dict | None = None, @@ -449,7 +448,6 @@ async def create_urls( url_mappings: list[SimpleURLMapping] = await create_urls( adb_client=self.adb_client, - status=status, source=source, record_type=record_type, collector_metadata=collector_metadata, diff --git a/tests/helpers/data_creator/create.py b/tests/helpers/data_creator/create.py index 57c9f9da..73ad8c63 100644 --- a/tests/helpers/data_creator/create.py +++ b/tests/helpers/data_creator/create.py @@ -32,14 +32,12 @@ async def create_batch( async def create_urls( adb_client: AsyncDatabaseClient, - status: URLStatus = URLStatus.OK, source: URLSource = URLSource.COLLECTOR, record_type: RecordType | None = RecordType.RESOURCES, collector_metadata: dict | None = None, count: int = 1 ) -> list[SimpleURLMapping]: urls: list[URLInsertModel] = generate_urls( - status=status, source=source, collector_metadata=collector_metadata, count=count, diff --git a/tests/helpers/data_creator/generate.py b/tests/helpers/data_creator/generate.py index b447888d..6c5f8071 100644 --- a/tests/helpers/data_creator/generate.py +++ b/tests/helpers/data_creator/generate.py @@ -39,7 +39,6 @@ def generate_batch_url_links( ] def generate_urls( - status: URLStatus = URLStatus.OK, source: URLSource = URLSource.COLLECTOR, collector_metadata: dict | None = None, count: int = 1 @@ -50,7 +49,6 @@ def generate_urls( results.append(URLInsertModel( url=f"example.com/{val}", scheme="https", - status=status, source=source, name=f"Example {val}", collector_metadata=collector_metadata, From 650580aef83c38c598b0606f5504deae81b08235 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Wed, 31 Dec 2025 19:17:01 -0500 Subject: [PATCH 22/24] Remove URLStatus --- .../queries/get_annotation_batch_info.py | 1 - .../annotate/_shared/queries/helper.py | 1 - src/api/endpoints/collector/manual/query.py | 2 +- .../metrics/batches/aggregated/query/core.py | 20 +++---------------- .../aggregated/query/url_error/query.py | 1 - .../metrics/batches/breakdown/query.py | 6 ++---- .../metrics/urls/breakdown/query/core.py | 5 ++--- src/api/endpoints/review/reject/query.py | 1 - .../submit/data_source/queries/core.py | 3 --- src/api/endpoints/submit/url/queries/core.py | 3 +-- src/api/endpoints/task/by_id/query.py | 3 +-- src/api/endpoints/url/get/dto.py | 2 -- src/collectors/enums.py | 6 ------ .../impl/huggingface/queries/get/mappings.py | 1 - .../huggingface/queries/prereq/requester.py | 8 -------- .../operators/auto_relevant/queries/cte.py | 3 +-- src/db/models/impl/url/core/pydantic/info.py | 3 --- .../models/impl/url/core/pydantic/insert.py | 2 -- src/db/models/impl/url/core/sqlalchemy.py | 10 ++++------ .../core/common/annotation_exists_/core.py | 3 +-- .../url_counts/cte/error.py | 1 - .../core/metrics/urls/aggregated/pending.py | 4 ++-- src/db/statement_composer.py | 14 +------------ tests/alembic/helpers.py | 2 -- .../api/_helpers/RequestValidator.py | 2 +- .../api/annotate/all/test_anon_count.py | 3 ++- .../api/annotate/all/test_happy_path.py | 2 +- .../api/annotate/all/test_sorting.py | 1 - .../api/annotate/anonymous/test_core.py | 2 +- .../api/metrics/batches/test_aggregated.py | 2 +- .../api/metrics/batches/test_breakdown.py | 2 +- .../integration/api/metrics/test_backlog.py | 1 - .../api/metrics/urls/aggregated/test_core.py | 2 +- .../metrics/urls/breakdown/test_submitted.py | 2 +- .../api/proposals/test_agencies.py | 2 +- .../api/submit/data_source/test_core.py | 1 - .../api/submit/data_source/test_duplicate.py | 4 ++-- .../api/submit/test_url_maximal.py | 2 +- .../integration/api/test_manual_batch.py | 8 ++++---- .../api/url/by_id/delete/test_any_url.py | 10 +++++----- .../api/url/by_id/snapshot/test_not_found.py | 3 ++- tests/automated/integration/conftest.py | 12 ----------- .../core/async_/conclude_task/test_error.py | 1 - .../core/async_/conclude_task/test_success.py | 1 - .../core/async_/run_task/test_break_loop.py | 2 +- .../core/async_/run_task/test_prereq_met.py | 2 -- .../annotate_url/test_agency_not_in_db.py | 2 +- .../db/client/approve_url/test_basic.py | 3 +-- .../db/client/approve_url/test_error.py | 3 +-- ...next_url_for_annotation_batch_filtering.py | 2 +- .../db/structure/test_updated_at.py | 2 -- .../api/data_sources/by_id/test_get.py | 1 + .../integration/readonly/setup/annotations.py | 2 +- .../integration/readonly/setup/data_source.py | 1 - .../integration/readonly/setup/meta_url.py | 1 - .../impl/huggingface/setup/queries/setup.py | 1 - .../test_no_html_content_not_picked_up.py | 1 - .../test_not_relevant_picked_up.py | 5 ++--- .../huggingface/test_validated_picked_up.py | 5 ++--- .../probe/test_entry_not_found.py | 2 +- .../probe/test_happy_path.py | 2 +- .../impl/sync_to_ds/data_source/test_add.py | 1 - .../test_update_optional_ds_metadata.py | 2 -- .../survey/test_survey_flag.py | 1 + .../tasks/url/impl/html/mocks/methods.py | 2 -- .../tasks/url/impl/html/test_200.py | 1 - .../tasks/url/impl/html/test_404.py | 1 + .../tasks/url/impl/html/test_error.py | 1 + .../url/impl/html/test_no_web_metadata.py | 1 - .../tasks/url/impl/probe/check/manager.py | 2 +- .../tasks/url/impl/probe/models/__init__.py | 0 .../tasks/url/impl/probe/models/entry.py | 10 ---------- .../url/impl/probe/no_redirect/test_error.py | 3 --- .../impl/probe/no_redirect/test_not_found.py | 2 -- .../url/impl/probe/no_redirect/test_ok.py | 1 - .../impl/probe/no_redirect/test_two_urls.py | 1 - .../probe/redirect/dest_new/test_dest_ok.py | 1 - .../probe/redirect/test_dest_exists_in_db.py | 1 - .../redirect/test_functional_equivalent.py | 1 - .../probe/redirect/test_two_urls_same_dest.py | 1 - .../tasks/url/impl/probe/setup/manager.py | 1 - ...two_branches_one_root_in_db_not_flagged.py | 1 - .../tasks/url/impl/test_example_task.py | 3 ++- .../test_url_miscellaneous_metadata_task.py | 6 +++--- .../url/impl/test_url_record_type_task.py | 9 +++++---- .../url/impl/validate/test_data_source.py | 2 +- tests/automated/unit/core/test_core_logger.py | 2 +- .../security_manager/test_security_manager.py | 2 +- .../test_autogoogler_collector.py | 6 +++--- .../test_common_crawl_collector.py | 4 ++-- .../test_example_collector.py | 4 ++-- .../test_muckrock_collectors.py | 6 +++--- .../helpers/batch_creation_parameters/core.py | 1 - .../data_creator/commands/impl/html_data.py | 4 +--- .../impl/suggestion/agency_confirmed.py | 1 + .../impl/suggestion/auto/agency_/core.py | 1 + .../commands/impl/urls_/convert.py | 19 ------------------ .../data_creator/commands/impl/urls_/query.py | 5 ++--- .../data_creator/commands/impl/urls_/tdo.py | 2 -- .../commands/impl/urls_v2/core.py | 3 --- .../commands/impl/urls_v2/response.py | 1 - tests/helpers/data_creator/core.py | 4 ++-- tests/helpers/data_creator/create.py | 2 +- tests/helpers/data_creator/generate.py | 5 ++--- tests/helpers/setup/annotation/core.py | 1 - tests/helpers/setup/final_review/core.py | 2 -- tests/helpers/setup/wipe.py | 2 +- tests/manual/api/test_contributions.py | 2 +- .../lifecycle/test_auto_googler_lifecycle.py | 2 +- .../core/lifecycle/test_ckan_lifecycle.py | 7 ++++--- .../lifecycle/test_muckrock_lifecycles.py | 7 ++++--- .../scheduled/test_push_to_huggingface.py | 2 -- .../huggingface/inference/test_relevancy.py | 3 +-- .../external/internet_archive/test_search.py | 1 - .../test_deepseek_record_classifier.py | 2 +- .../test_openai_record_classifier.py | 2 +- .../test_autogoogler_collector.py | 5 +++-- .../source_collectors/test_ckan_collector.py | 3 +-- .../test_common_crawler_collector.py | 3 +-- .../test_muckrock_collectors.py | 7 ++++--- .../test_common_crawler_integration.py | 3 --- 121 files changed, 116 insertions(+), 270 deletions(-) delete mode 100644 tests/automated/integration/tasks/url/impl/probe/models/__init__.py delete mode 100644 tests/automated/integration/tasks/url/impl/probe/models/entry.py diff --git a/src/api/endpoints/annotate/_shared/queries/get_annotation_batch_info.py b/src/api/endpoints/annotate/_shared/queries/get_annotation_batch_info.py index 0154ca2f..b9fcc935 100644 --- a/src/api/endpoints/annotate/_shared/queries/get_annotation_batch_info.py +++ b/src/api/endpoints/annotate/_shared/queries/get_annotation_batch_info.py @@ -4,7 +4,6 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.annotate.dtos.shared.batch import AnnotationBatchInfo -from src.collectors.enums import URLStatus from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/api/endpoints/annotate/_shared/queries/helper.py b/src/api/endpoints/annotate/_shared/queries/helper.py index 9d7e2210..57370c36 100644 --- a/src/api/endpoints/annotate/_shared/queries/helper.py +++ b/src/api/endpoints/annotate/_shared/queries/helper.py @@ -5,7 +5,6 @@ from sqlalchemy import Select, case, CTE, ColumnElement from sqlalchemy.orm import joinedload -from src.collectors.enums import URLStatus from src.db.helpers.query import exists_url, not_exists_url from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended from src.db.models.impl.url.core.enums import URLSource diff --git a/src/api/endpoints/collector/manual/query.py b/src/api/endpoints/collector/manual/query.py index 31cd91ad..8216b10b 100644 --- a/src/api/endpoints/collector/manual/query.py +++ b/src/api/endpoints/collector/manual/query.py @@ -3,7 +3,7 @@ from src.api.endpoints.collector.dtos.manual_batch.post import ManualBatchInputDTO from src.api.endpoints.collector.dtos.manual_batch.response import ManualBatchResponseDTO -from src.collectors.enums import CollectorType, URLStatus +from src.collectors.enums import CollectorType from src.core.enums import BatchStatus from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL diff --git a/src/api/endpoints/metrics/batches/aggregated/query/core.py b/src/api/endpoints/metrics/batches/aggregated/query/core.py index cc6259de..07015c1d 100644 --- a/src/api/endpoints/metrics/batches/aggregated/query/core.py +++ b/src/api/endpoints/metrics/batches/aggregated/query/core.py @@ -1,29 +1,15 @@ -from sqlalchemy import case, select +from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.sql.functions import coalesce, func +from sqlalchemy.sql.functions import func from src.api.endpoints.metrics.batches.aggregated.dto import GetMetricsBatchesAggregatedResponseDTO, \ GetMetricsBatchesAggregatedInnerResponseDTO -from src.api.endpoints.metrics.batches.aggregated.query.all_urls.query import CountAllURLsByBatchStrategyQueryBuilder -from src.api.endpoints.metrics.batches.aggregated.query.batch_status_.query import \ - BatchStatusByBatchStrategyQueryBuilder from src.api.endpoints.metrics.batches.aggregated.query.requester_.requester import \ GetBatchesAggregatedMetricsQueryRequester -from src.api.endpoints.metrics.batches.aggregated.query.submitted_.query import \ - CountSubmittedByBatchStrategyQueryBuilder -from src.api.endpoints.metrics.batches.aggregated.query.url_error.query import URLErrorByBatchStrategyQueryBuilder -from src.api.endpoints.metrics.batches.aggregated.query.validated_.query import \ - ValidatedURLCountByBatchStrategyQueryBuilder -from src.collectors.enums import URLStatus, CollectorType +from src.collectors.enums import CollectorType from src.core.enums import BatchStatus from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.db.queries.base.builder import QueryBuilderBase -from src.db.statement_composer import StatementComposer class GetBatchesAggregatedMetricsQueryBuilder(QueryBuilderBase): diff --git a/src/api/endpoints/metrics/batches/aggregated/query/url_error/query.py b/src/api/endpoints/metrics/batches/aggregated/query/url_error/query.py index 23929c14..6712c76d 100644 --- a/src/api/endpoints/metrics/batches/aggregated/query/url_error/query.py +++ b/src/api/endpoints/metrics/batches/aggregated/query/url_error/query.py @@ -4,7 +4,6 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.metrics.batches.aggregated.query.models.strategy_count import CountByBatchStrategyResponse -from src.collectors.enums import URLStatus from src.db.helpers.query import exists_url from src.db.helpers.session import session_helper as sh from src.db.models.impl.batch.sqlalchemy import Batch diff --git a/src/api/endpoints/metrics/batches/breakdown/query.py b/src/api/endpoints/metrics/batches/breakdown/query.py index 5847e309..d46a01b9 100644 --- a/src/api/endpoints/metrics/batches/breakdown/query.py +++ b/src/api/endpoints/metrics/batches/breakdown/query.py @@ -1,4 +1,4 @@ -from sqlalchemy import select, case, Column +from sqlalchemy import select, Column from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.sql.functions import coalesce @@ -11,11 +11,9 @@ from src.api.endpoints.metrics.batches.breakdown.templates.cte_ import BatchesBreakdownURLCTE from src.api.endpoints.metrics.batches.breakdown.total.cte_ import TOTAL_CTE from src.api.endpoints.metrics.batches.breakdown.validated.cte_ import VALIDATED_CTE -from src.collectors.enums import URLStatus, CollectorType +from src.collectors.enums import CollectorType from src.core.enums import BatchStatus from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.url.core.sqlalchemy import URL from src.db.queries.base.builder import QueryBuilderBase from src.db.statement_composer import StatementComposer diff --git a/src/api/endpoints/metrics/urls/breakdown/query/core.py b/src/api/endpoints/metrics/urls/breakdown/query/core.py index d2a1703f..df521497 100644 --- a/src/api/endpoints/metrics/urls/breakdown/query/core.py +++ b/src/api/endpoints/metrics/urls/breakdown/query/core.py @@ -3,12 +3,11 @@ from src.api.endpoints.metrics.dtos.get.urls.breakdown.pending import GetMetricsURLsBreakdownPendingResponseInnerDTO, \ GetMetricsURLsBreakdownPendingResponseDTO -from src.collectors.enums import URLStatus from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser +from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated +from src.db.models.impl.url.core.sqlalchemy import URL from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/api/endpoints/review/reject/query.py b/src/api/endpoints/review/reject/query.py index 1f9dfe91..ed444bfb 100644 --- a/src/api/endpoints/review/reject/query.py +++ b/src/api/endpoints/review/reject/query.py @@ -4,7 +4,6 @@ from starlette.status import HTTP_400_BAD_REQUEST from src.api.endpoints.review.enums import RejectionReason -from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL diff --git a/src/api/endpoints/submit/data_source/queries/core.py b/src/api/endpoints/submit/data_source/queries/core.py index f4329786..aec2e821 100644 --- a/src/api/endpoints/submit/data_source/queries/core.py +++ b/src/api/endpoints/submit/data_source/queries/core.py @@ -1,12 +1,9 @@ import uuid -from typing import Any -from sqlalchemy.exc import IntegrityError from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.submit.data_source.models.response.standard import SubmitDataSourceURLProposalResponse from src.api.endpoints.submit.data_source.request import DataSourceSubmissionRequest -from src.collectors.enums import URLStatus from src.core.enums import BatchStatus from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon diff --git a/src/api/endpoints/submit/url/queries/core.py b/src/api/endpoints/submit/url/queries/core.py index 8e257072..49e56a98 100644 --- a/src/api/endpoints/submit/url/queries/core.py +++ b/src/api/endpoints/submit/url/queries/core.py @@ -7,16 +7,15 @@ from src.api.endpoints.submit.url.queries.convert import convert_invalid_url_to_url_response, \ convert_duplicate_urls_to_url_response from src.api.endpoints.submit.url.queries.dedupe import DeduplicateURLQueryBuilder -from src.collectors.enums import URLStatus from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.impl.link.user_suggestion_not_found.users_submitted_url.sqlalchemy import LinkUserSubmittedURL from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.queries.base.builder import QueryBuilderBase from src.util.models.url_and_scheme import URLAndScheme from src.util.url import clean_url, get_url_and_scheme, is_valid_url diff --git a/src/api/endpoints/task/by_id/query.py b/src/api/endpoints/task/by_id/query.py index 6aa55fd0..f1ea5adb 100644 --- a/src/api/endpoints/task/by_id/query.py +++ b/src/api/endpoints/task/by_id/query.py @@ -1,9 +1,8 @@ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import selectinload, joinedload +from sqlalchemy.orm import selectinload from src.api.endpoints.task.by_id.dto import TaskInfo -from src.collectors.enums import URLStatus from src.db.enums import TaskType from src.db.models.impl.task.core import Task from src.db.models.impl.task.enums import TaskStatus diff --git a/src/api/endpoints/url/get/dto.py b/src/api/endpoints/url/get/dto.py index 6eee6e51..0e10c6e9 100644 --- a/src/api/endpoints/url/get/dto.py +++ b/src/api/endpoints/url/get/dto.py @@ -1,9 +1,7 @@ import datetime -from typing import Optional from pydantic import BaseModel -from src.collectors.enums import URLStatus from src.db.enums import URLMetadataAttributeType, ValidationStatus, ValidationSource, TaskType from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum diff --git a/src/collectors/enums.py b/src/collectors/enums.py index 16711a0c..2e5f6239 100644 --- a/src/collectors/enums.py +++ b/src/collectors/enums.py @@ -9,9 +9,3 @@ class CollectorType(Enum): MUCKROCK_ALL_SEARCH = "muckrock_all_search" CKAN = "ckan" MANUAL = "manual" - -class URLStatus(Enum): - OK = "ok" - ERROR = "error" - DUPLICATE = "duplicate" - BROKEN = "broken" diff --git a/src/core/tasks/scheduled/impl/huggingface/queries/get/mappings.py b/src/core/tasks/scheduled/impl/huggingface/queries/get/mappings.py index 0621ee52..ebef8b45 100644 --- a/src/core/tasks/scheduled/impl/huggingface/queries/get/mappings.py +++ b/src/core/tasks/scheduled/impl/huggingface/queries/get/mappings.py @@ -1,4 +1,3 @@ -from src.collectors.enums import URLStatus from src.core.enums import RecordType from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse diff --git a/src/core/tasks/scheduled/impl/huggingface/queries/prereq/requester.py b/src/core/tasks/scheduled/impl/huggingface/queries/prereq/requester.py index 1eaa306d..3abadbf5 100644 --- a/src/core/tasks/scheduled/impl/huggingface/queries/prereq/requester.py +++ b/src/core/tasks/scheduled/impl/huggingface/queries/prereq/requester.py @@ -1,19 +1,11 @@ from datetime import datetime -from operator import or_ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.sql.functions import count -from src.collectors.enums import URLStatus from src.core.tasks.scheduled.impl.huggingface.queries.cte import HuggingfacePrereqCTEContainer -from src.db.enums import TaskType -from src.db.helpers.query import not_exists_url, no_url_task_error, exists_url from src.db.helpers.session import session_helper as sh -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.state.huggingface import HuggingFaceUploadState -from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML -from src.db.models.impl.url.core.sqlalchemy import URL class CheckValidURLsUpdatedRequester: diff --git a/src/core/tasks/url/operators/auto_relevant/queries/cte.py b/src/core/tasks/url/operators/auto_relevant/queries/cte.py index ab90db75..a4e14b2d 100644 --- a/src/core/tasks/url/operators/auto_relevant/queries/cte.py +++ b/src/core/tasks/url/operators/auto_relevant/queries/cte.py @@ -1,12 +1,11 @@ from sqlalchemy import select, CTE from sqlalchemy.orm import aliased -from src.collectors.enums import URLStatus from src.db.enums import TaskType from src.db.helpers.query import not_exists_url, no_url_task_error +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML -from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType class AutoRelevantPrerequisitesCTEContainer: diff --git a/src/db/models/impl/url/core/pydantic/info.py b/src/db/models/impl/url/core/pydantic/info.py index 0985b3fc..74082427 100644 --- a/src/db/models/impl/url/core/pydantic/info.py +++ b/src/db/models/impl/url/core/pydantic/info.py @@ -1,9 +1,7 @@ import datetime -from typing import Optional from pydantic import BaseModel -from src.collectors.enums import URLStatus from src.db.models.impl.url.core.enums import URLSource @@ -12,7 +10,6 @@ class URLInfo(BaseModel): batch_id: int | None= None url: str collector_metadata: dict | None = None - status: URLStatus = URLStatus.OK updated_at: datetime.datetime | None = None created_at: datetime.datetime | None = None name: str | None = None diff --git a/src/db/models/impl/url/core/pydantic/insert.py b/src/db/models/impl/url/core/pydantic/insert.py index 33842d53..643cab15 100644 --- a/src/db/models/impl/url/core/pydantic/insert.py +++ b/src/db/models/impl/url/core/pydantic/insert.py @@ -1,5 +1,3 @@ -from src.collectors.enums import URLStatus -from src.core.enums import RecordType from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.templates_.base import Base diff --git a/src/db/models/impl/url/core/sqlalchemy.py b/src/db/models/impl/url/core/sqlalchemy.py index 35178505..b9eedc5c 100644 --- a/src/db/models/impl/url/core/sqlalchemy.py +++ b/src/db/models/impl/url/core/sqlalchemy.py @@ -2,7 +2,6 @@ from sqlalchemy.ext.hybrid import hybrid_property from sqlalchemy.orm import relationship, Mapped -from src.collectors.enums import URLStatus from src.db.models.helpers import enum_column from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask @@ -10,19 +9,18 @@ from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser -from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.impl.url.checked_for_duplicate import URLCheckedForDuplicate from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType -from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType -from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser -from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from src.db.models.mixins import UpdatedAtMixin, CreatedAtMixin from src.db.models.templates_.with_id import WithIDBase diff --git a/src/db/queries/implementations/core/common/annotation_exists_/core.py b/src/db/queries/implementations/core/common/annotation_exists_/core.py index 072c04af..4c7328a2 100644 --- a/src/db/queries/implementations/core/common/annotation_exists_/core.py +++ b/src/db/queries/implementations/core/common/annotation_exists_/core.py @@ -16,12 +16,11 @@ from sqlalchemy import case, func, Select, select -from src.collectors.enums import URLStatus -from src.db.queries.implementations.core.common.annotation_exists_.constants import ALL_ANNOTATION_MODELS from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.mixins import URLDependentMixin from src.db.queries.base.builder import QueryBuilderBase +from src.db.queries.implementations.core.common.annotation_exists_.constants import ALL_ANNOTATION_MODELS class AnnotationExistsCTEQueryBuilder(QueryBuilderBase): diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/error.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/error.py index 953a5c0d..2109588b 100644 --- a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/error.py +++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/error.py @@ -1,6 +1,5 @@ from sqlalchemy import select, func -from src.collectors.enums import URLStatus from src.db.helpers.query import exists_url from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL diff --git a/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py b/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py index 30aba066..e95726bf 100644 --- a/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py +++ b/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py @@ -4,15 +4,15 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.metrics.dtos.get.urls.aggregated.pending import GetMetricsURLsAggregatedPendingResponseDTO -from src.collectors.enums import URLStatus from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser -from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser +from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.mixins import URLDependentMixin from src.db.queries.base.builder import QueryBuilderBase from src.db.queries.implementations.core.common.annotation_exists_.core import AnnotationExistsCTEQueryBuilder + class PendingAnnotationExistsCTEQueryBuilder(AnnotationExistsCTEQueryBuilder): @property diff --git a/src/db/statement_composer.py b/src/db/statement_composer.py index 6834ee1e..d3e90b8b 100644 --- a/src/db/statement_composer.py +++ b/src/db/statement_composer.py @@ -1,21 +1,9 @@ -from http import HTTPStatus -from typing import Any +from sqlalchemy import Select, select, exists, func, Subquery, not_, ColumnElement -from sqlalchemy import Select, select, exists, func, Subquery, and_, not_, ColumnElement, Exists -from sqlalchemy.orm import selectinload - -from src.collectors.enums import URLStatus -from src.db.enums import TaskType from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.link.task_url import LinkTaskURL -from src.db.models.impl.task.core import Task -from src.db.models.impl.task.enums import TaskStatus from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata -from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo -from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata -from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView from src.db.types import UserSuggestionType diff --git a/tests/alembic/helpers.py b/tests/alembic/helpers.py index a284e0fc..0e19d035 100644 --- a/tests/alembic/helpers.py +++ b/tests/alembic/helpers.py @@ -1,5 +1,3 @@ -from typing import Optional - from sqlalchemy import text from sqlalchemy.orm import Session diff --git a/tests/automated/integration/api/_helpers/RequestValidator.py b/tests/automated/integration/api/_helpers/RequestValidator.py index 851e75fb..b1bfbf20 100644 --- a/tests/automated/integration/api/_helpers/RequestValidator.py +++ b/tests/automated/integration/api/_helpers/RequestValidator.py @@ -10,7 +10,6 @@ from src.api.endpoints.batch.dtos.get.logs import GetBatchLogsResponse from src.api.endpoints.batch.dtos.get.summaries.response import GetBatchSummariesResponse from src.api.endpoints.batch.dtos.get.summaries.summary import BatchSummary -from src.api.shared.models.message_response import MessageResponse from src.api.endpoints.batch.duplicates.dto import GetDuplicatesByBatchResponse from src.api.endpoints.batch.urls.dto import GetURLsByBatchResponse from src.api.endpoints.collector.dtos.manual_batch.post import ManualBatchInputDTO @@ -32,6 +31,7 @@ from src.api.endpoints.task.dtos.get.task_status import GetTaskStatusResponseInfo from src.api.endpoints.task.dtos.get.tasks import GetTasksResponse from src.api.endpoints.url.get.dto import GetURLsResponseInfo +from src.api.shared.models.message_response import MessageResponse from src.collectors.enums import CollectorType from src.collectors.impl.example.dtos.input import ExampleInputDTO from src.core.enums import BatchStatus diff --git a/tests/automated/integration/api/annotate/all/test_anon_count.py b/tests/automated/integration/api/annotate/all/test_anon_count.py index 16fe728b..05975236 100644 --- a/tests/automated/integration/api/annotate/all/test_anon_count.py +++ b/tests/automated/integration/api/annotate/all/test_anon_count.py @@ -1,6 +1,7 @@ -import pytest import uuid +import pytest + from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.core.enums import RecordType from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon diff --git a/tests/automated/integration/api/annotate/all/test_happy_path.py b/tests/automated/integration/api/annotate/all/test_happy_path.py index a356fa56..8a62c3e8 100644 --- a/tests/automated/integration/api/annotate/all/test_happy_path.py +++ b/tests/automated/integration/api/annotate/all/test_happy_path.py @@ -11,10 +11,10 @@ from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion -from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser +from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review diff --git a/tests/automated/integration/api/annotate/all/test_sorting.py b/tests/automated/integration/api/annotate/all/test_sorting.py index 1a81dc89..2f9f7b2a 100644 --- a/tests/automated/integration/api/annotate/all/test_sorting.py +++ b/tests/automated/integration/api/annotate/all/test_sorting.py @@ -1,7 +1,6 @@ import pytest from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.link.location__user_follow import LinkLocationUserFollow from src.db.models.impl.link.location_batch.sqlalchemy import LinkLocationBatch from src.db.models.impl.url.core.enums import URLSource diff --git a/tests/automated/integration/api/annotate/anonymous/test_core.py b/tests/automated/integration/api/annotate/anonymous/test_core.py index e977accb..65f18965 100644 --- a/tests/automated/integration/api/annotate/anonymous/test_core.py +++ b/tests/automated/integration/api/annotate/anonymous/test_core.py @@ -15,6 +15,7 @@ from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon @@ -22,7 +23,6 @@ from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement from src.db.models.mixins import URLDependentMixin from tests.automated.integration.api.annotate.anonymous.helper import get_next_url_for_anonymous_annotation, \ post_and_get_next_url_for_anonymous_annotation diff --git a/tests/automated/integration/api/metrics/batches/test_aggregated.py b/tests/automated/integration/api/metrics/batches/test_aggregated.py index 6142a345..00936d15 100644 --- a/tests/automated/integration/api/metrics/batches/test_aggregated.py +++ b/tests/automated/integration/api/metrics/batches/test_aggregated.py @@ -1,6 +1,6 @@ import pytest -from src.collectors.enums import CollectorType, URLStatus +from src.collectors.enums import CollectorType from src.core.enums import BatchStatus from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.mapping_.simple import SimpleURLMapping diff --git a/tests/automated/integration/api/metrics/batches/test_breakdown.py b/tests/automated/integration/api/metrics/batches/test_breakdown.py index 6921c3c1..71b7c96b 100644 --- a/tests/automated/integration/api/metrics/batches/test_breakdown.py +++ b/tests/automated/integration/api/metrics/batches/test_breakdown.py @@ -2,7 +2,7 @@ import pytest -from src.collectors.enums import CollectorType, URLStatus +from src.collectors.enums import CollectorType from src.core.enums import BatchStatus from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.mapping_.simple import SimpleURLMapping diff --git a/tests/automated/integration/api/metrics/test_backlog.py b/tests/automated/integration/api/metrics/test_backlog.py index 181c295e..a6de442e 100644 --- a/tests/automated/integration/api/metrics/test_backlog.py +++ b/tests/automated/integration/api/metrics/test_backlog.py @@ -1,7 +1,6 @@ import pendulum import pytest -from src.collectors.enums import URLStatus from src.db.dtos.url.mapping_.simple import SimpleURLMapping from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.data_creator.core import DBDataCreator diff --git a/tests/automated/integration/api/metrics/urls/aggregated/test_core.py b/tests/automated/integration/api/metrics/urls/aggregated/test_core.py index 224e7d33..5dc163c7 100644 --- a/tests/automated/integration/api/metrics/urls/aggregated/test_core.py +++ b/tests/automated/integration/api/metrics/urls/aggregated/test_core.py @@ -2,7 +2,7 @@ import pytest -from src.collectors.enums import CollectorType, URLStatus +from src.collectors.enums import CollectorType from src.db.dtos.url.mapping_.simple import SimpleURLMapping from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters diff --git a/tests/automated/integration/api/metrics/urls/breakdown/test_submitted.py b/tests/automated/integration/api/metrics/urls/breakdown/test_submitted.py index d0a25ab1..a9a52d2e 100644 --- a/tests/automated/integration/api/metrics/urls/breakdown/test_submitted.py +++ b/tests/automated/integration/api/metrics/urls/breakdown/test_submitted.py @@ -1,7 +1,7 @@ import pendulum import pytest -from src.collectors.enums import CollectorType, URLStatus +from src.collectors.enums import CollectorType from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters from tests.helpers.batch_creation_parameters.enums import URLCreationEnum from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters diff --git a/tests/automated/integration/api/proposals/test_agencies.py b/tests/automated/integration/api/proposals/test_agencies.py index d1a2d2ab..354481f1 100644 --- a/tests/automated/integration/api/proposals/test_agencies.py +++ b/tests/automated/integration/api/proposals/test_agencies.py @@ -3,9 +3,9 @@ from src.api.endpoints.proposals.agencies.by_id.approve.response import ProposalAgencyApproveResponse from src.api.endpoints.proposals.agencies.by_id.locations.get.response import ProposalAgencyGetLocationsOuterResponse from src.api.endpoints.proposals.agencies.by_id.put.request import ProposalAgencyPutRequest -from src.api.endpoints.proposals.agencies.root.get.response import ProposalAgencyGetOuterResponse from src.api.endpoints.proposals.agencies.by_id.reject.request import ProposalAgencyRejectRequestModel from src.api.endpoints.proposals.agencies.by_id.reject.response import ProposalAgencyRejectResponse +from src.api.endpoints.proposals.agencies.root.get.response import ProposalAgencyGetOuterResponse from src.api.endpoints.submit.agency.enums import AgencyProposalRequestStatus from src.api.endpoints.submit.agency.request import SubmitAgencyRequestModel from src.api.endpoints.submit.agency.response import SubmitAgencyProposalResponse diff --git a/tests/automated/integration/api/submit/data_source/test_core.py b/tests/automated/integration/api/submit/data_source/test_core.py index 120abd29..bf339bfd 100644 --- a/tests/automated/integration/api/submit/data_source/test_core.py +++ b/tests/automated/integration/api/submit/data_source/test_core.py @@ -4,7 +4,6 @@ import pytest from src.api.endpoints.submit.data_source.request import DataSourceSubmissionRequest -from src.collectors.enums import URLStatus from src.core.enums import RecordType, BatchStatus from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon diff --git a/tests/automated/integration/api/submit/data_source/test_duplicate.py b/tests/automated/integration/api/submit/data_source/test_duplicate.py index 37fb9703..87dd21a7 100644 --- a/tests/automated/integration/api/submit/data_source/test_duplicate.py +++ b/tests/automated/integration/api/submit/data_source/test_duplicate.py @@ -1,9 +1,9 @@ import pytest from fastapi import HTTPException -from src.api.endpoints.submit.data_source.models.response.duplicate import SubmitDataSourceURLDuplicateSubmissionResponse +from src.api.endpoints.submit.data_source.models.response.duplicate import \ + SubmitDataSourceURLDuplicateSubmissionResponse from src.api.endpoints.submit.data_source.request import DataSourceSubmissionRequest -from src.collectors.enums import URLStatus from src.core.enums import RecordType from src.db.dtos.url.mapping_.simple import SimpleURLMapping from src.db.models.impl.flag.url_validated.enums import URLType diff --git a/tests/automated/integration/api/submit/test_url_maximal.py b/tests/automated/integration/api/submit/test_url_maximal.py index 1d458c98..5e9f0ec4 100644 --- a/tests/automated/integration/api/submit/test_url_maximal.py +++ b/tests/automated/integration/api/submit/test_url_maximal.py @@ -10,9 +10,9 @@ from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.impl.link.user_suggestion_not_found.users_submitted_url.sqlalchemy import LinkUserSubmittedURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from tests.helpers.api_test_helper import APITestHelper from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo diff --git a/tests/automated/integration/api/test_manual_batch.py b/tests/automated/integration/api/test_manual_batch.py index fa3f7884..ad8bfe3f 100644 --- a/tests/automated/integration/api/test_manual_batch.py +++ b/tests/automated/integration/api/test_manual_batch.py @@ -2,12 +2,12 @@ import pytest from src.api.endpoints.collector.dtos.manual_batch.post import ManualBatchInnerInputDTO, ManualBatchInputDTO -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.batch.sqlalchemy import Batch from src.collectors.enums import CollectorType from src.core.enums import RecordType +from src.db.models.impl.batch.sqlalchemy import Batch +from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata @pytest.mark.asyncio diff --git a/tests/automated/integration/api/url/by_id/delete/test_any_url.py b/tests/automated/integration/api/url/by_id/delete/test_any_url.py index 2711c103..d61f1553 100644 --- a/tests/automated/integration/api/url/by_id/delete/test_any_url.py +++ b/tests/automated/integration/api/url/by_id/delete/test_any_url.py @@ -19,8 +19,13 @@ from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.change_log import ChangeLog from src.db.models.impl.flag.checked_for_ia.sqlalchemy import FlagURLCheckedForInternetArchives from src.db.models.impl.flag.root_url.sqlalchemy import FlagRootURL @@ -29,7 +34,6 @@ from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.link.url_redirect_url.sqlalchemy import LinkURLRedirectURL from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL -from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.impl.link.user_suggestion_not_found.users_submitted_url.sqlalchemy import LinkUserSubmittedURL @@ -40,10 +44,6 @@ from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata from src.db.models.impl.url.internet_archives.save.sqlalchemy import URLInternetArchivesSaveMetadata from src.db.models.impl.url.screenshot.sqlalchemy import URLScreenshot -from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType -from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser -from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType -from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.queries.implementations.anonymous_session import MakeAnonymousSessionQueryBuilder diff --git a/tests/automated/integration/api/url/by_id/snapshot/test_not_found.py b/tests/automated/integration/api/url/by_id/snapshot/test_not_found.py index cce84649..155b56d7 100644 --- a/tests/automated/integration/api/url/by_id/snapshot/test_not_found.py +++ b/tests/automated/integration/api/url/by_id/snapshot/test_not_found.py @@ -1,7 +1,8 @@ import pytest +from fastapi import Response from tests.helpers.api_test_helper import APITestHelper -from fastapi import Response + @pytest.mark.asyncio async def test_get_url_screenshot_not_found(api_test_helper: APITestHelper): diff --git a/tests/automated/integration/conftest.py b/tests/automated/integration/conftest.py index 1f7836ae..8a9a8569 100644 --- a/tests/automated/integration/conftest.py +++ b/tests/automated/integration/conftest.py @@ -6,7 +6,6 @@ from starlette.testclient import TestClient from src.api.main import app -from src.collectors.enums import URLStatus from src.collectors.manager import AsyncCollectorManager from src.core.core import AsyncCore from src.core.enums import RecordType @@ -248,17 +247,6 @@ async def test_url_id( ) return await db_data_creator.adb_client.add(url, return_id=True) -@pytest_asyncio.fixture -async def test_url_id_2( - db_data_creator: DBDataCreator, -) -> int: - url = URL( - url="example.com/2", - source=URLSource.COLLECTOR, - trailing_slash=False, - status=URLStatus.OK - ) - return await db_data_creator.adb_client.add(url, return_id=True) @pytest_asyncio.fixture diff --git a/tests/automated/integration/core/async_/conclude_task/test_error.py b/tests/automated/integration/core/async_/conclude_task/test_error.py index 1a31b87e..a747aa3a 100644 --- a/tests/automated/integration/core/async_/conclude_task/test_error.py +++ b/tests/automated/integration/core/async_/conclude_task/test_error.py @@ -1,6 +1,5 @@ import pytest -from src.core.enums import BatchStatus from src.core.tasks.url.enums import TaskOperatorOutcome from src.db.models.impl.task.enums import TaskStatus from tests.automated.integration.core.async_.conclude_task.helpers import setup_run_info diff --git a/tests/automated/integration/core/async_/conclude_task/test_success.py b/tests/automated/integration/core/async_/conclude_task/test_success.py index 03cc5b52..eb0e8988 100644 --- a/tests/automated/integration/core/async_/conclude_task/test_success.py +++ b/tests/automated/integration/core/async_/conclude_task/test_success.py @@ -1,6 +1,5 @@ import pytest -from src.core.enums import BatchStatus from src.core.tasks.url.enums import TaskOperatorOutcome from src.db.models.impl.task.enums import TaskStatus from tests.automated.integration.core.async_.conclude_task.helpers import setup_run_info diff --git a/tests/automated/integration/core/async_/run_task/test_break_loop.py b/tests/automated/integration/core/async_/run_task/test_break_loop.py index 71b5704f..0235bc08 100644 --- a/tests/automated/integration/core/async_/run_task/test_break_loop.py +++ b/tests/automated/integration/core/async_/run_task/test_break_loop.py @@ -4,10 +4,10 @@ import pytest from src.core.tasks.base.run_info import TaskOperatorRunInfo +from src.core.tasks.url.enums import TaskOperatorOutcome from src.core.tasks.url.models.entry import URLTaskEntry from src.core.tasks.url.operators.base import URLTaskOperatorBase from src.db.enums import TaskType -from src.core.tasks.url.enums import TaskOperatorOutcome from tests.automated.integration.core.async_.helpers import setup_async_core from tests.helpers.data_creator.core import DBDataCreator diff --git a/tests/automated/integration/core/async_/run_task/test_prereq_met.py b/tests/automated/integration/core/async_/run_task/test_prereq_met.py index e5425fd9..8d68034f 100644 --- a/tests/automated/integration/core/async_/run_task/test_prereq_met.py +++ b/tests/automated/integration/core/async_/run_task/test_prereq_met.py @@ -3,13 +3,11 @@ import pytest -from src.core.enums import BatchStatus from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.url.enums import TaskOperatorOutcome from src.core.tasks.url.models.entry import URLTaskEntry from src.core.tasks.url.operators.base import URLTaskOperatorBase from src.db.enums import TaskType -from src.db.models.impl.task.core import Task from tests.automated.integration.core.async_.helpers import setup_async_core from tests.helpers.data_creator.core import DBDataCreator diff --git a/tests/automated/integration/db/client/annotate_url/test_agency_not_in_db.py b/tests/automated/integration/db/client/annotate_url/test_agency_not_in_db.py index c419fb70..a91873a7 100644 --- a/tests/automated/integration/db/client/annotate_url/test_agency_not_in_db.py +++ b/tests/automated/integration/db/client/annotate_url/test_agency_not_in_db.py @@ -2,8 +2,8 @@ from src.db.constants import PLACEHOLDER_AGENCY_NAME from src.db.models.impl.agency.sqlalchemy import Agency -from tests.helpers.setup.annotate_agency.core import setup_for_annotate_agency from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.setup.annotate_agency.core import setup_for_annotate_agency @pytest.mark.asyncio diff --git a/tests/automated/integration/db/client/approve_url/test_basic.py b/tests/automated/integration/db/client/approve_url/test_basic.py index 734ff9b5..76150283 100644 --- a/tests/automated/integration/db/client/approve_url/test_basic.py +++ b/tests/automated/integration/db/client/approve_url/test_basic.py @@ -1,7 +1,6 @@ import pytest from src.api.endpoints.review.approve.dto import FinalReviewApprovalInfo -from src.collectors.enums import URLStatus from src.core.enums import RecordType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency @@ -9,8 +8,8 @@ from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType from src.db.models.impl.url.reviewing_user import ReviewingUserURL -from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review @pytest.mark.asyncio diff --git a/tests/automated/integration/db/client/approve_url/test_error.py b/tests/automated/integration/db/client/approve_url/test_error.py index f358a74b..c8e33547 100644 --- a/tests/automated/integration/db/client/approve_url/test_error.py +++ b/tests/automated/integration/db/client/approve_url/test_error.py @@ -2,9 +2,8 @@ from starlette.exceptions import HTTPException from src.api.endpoints.review.approve.dto import FinalReviewApprovalInfo -from src.core.enums import RecordType -from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review @pytest.mark.asyncio diff --git a/tests/automated/integration/db/client/test_get_next_url_for_annotation_batch_filtering.py b/tests/automated/integration/db/client/test_get_next_url_for_annotation_batch_filtering.py index 86d4a3ee..c32441f3 100644 --- a/tests/automated/integration/db/client/test_get_next_url_for_annotation_batch_filtering.py +++ b/tests/automated/integration/db/client/test_get_next_url_for_annotation_batch_filtering.py @@ -2,8 +2,8 @@ from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.core.enums import SuggestionType -from tests.helpers.setup.annotation.core import setup_for_get_next_url_for_annotation from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.setup.annotation.core import setup_for_get_next_url_for_annotation @pytest.mark.asyncio diff --git a/tests/automated/integration/db/structure/test_updated_at.py b/tests/automated/integration/db/structure/test_updated_at.py index 31d40dbd..d65c44c3 100644 --- a/tests/automated/integration/db/structure/test_updated_at.py +++ b/tests/automated/integration/db/structure/test_updated_at.py @@ -1,9 +1,7 @@ -import asyncio from datetime import datetime import pytest -from src.collectors.enums import URLStatus from src.db.models.impl.url.core.pydantic.upsert import URLUpsertModel from src.db.models.impl.url.core.sqlalchemy import URL from tests.helpers.data_creator.core import DBDataCreator diff --git a/tests/automated/integration/readonly/api/data_sources/by_id/test_get.py b/tests/automated/integration/readonly/api/data_sources/by_id/test_get.py index 16c30869..2abab495 100644 --- a/tests/automated/integration/readonly/api/data_sources/by_id/test_get.py +++ b/tests/automated/integration/readonly/api/data_sources/by_id/test_get.py @@ -3,6 +3,7 @@ from src.api.endpoints.data_source.get.response import DataSourceGetResponse from tests.automated.integration.readonly.helper import ReadOnlyTestHelper + @pytest.mark.asyncio async def test_get_by_id(readonly_helper: ReadOnlyTestHelper): raw_json: dict = readonly_helper.api_test_helper.request_validator.get_v3( diff --git a/tests/automated/integration/readonly/setup/annotations.py b/tests/automated/integration/readonly/setup/annotations.py index 9e701f62..6829e714 100644 --- a/tests/automated/integration/readonly/setup/annotations.py +++ b/tests/automated/integration/readonly/setup/annotations.py @@ -4,10 +4,10 @@ from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion -from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser +from src.db.models.impl.flag.url_validated.enums import URLType async def add_full_data_sources_annotations( diff --git a/tests/automated/integration/readonly/setup/data_source.py b/tests/automated/integration/readonly/setup/data_source.py index 7c626d04..d5984c06 100644 --- a/tests/automated/integration/readonly/setup/data_source.py +++ b/tests/automated/integration/readonly/setup/data_source.py @@ -1,6 +1,5 @@ from datetime import date -from src.collectors.enums import URLStatus from src.core.enums import RecordType from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.flag.url_validated.enums import URLType diff --git a/tests/automated/integration/readonly/setup/meta_url.py b/tests/automated/integration/readonly/setup/meta_url.py index e1e32c7f..d5ea9da4 100644 --- a/tests/automated/integration/readonly/setup/meta_url.py +++ b/tests/automated/integration/readonly/setup/meta_url.py @@ -1,4 +1,3 @@ -from src.collectors.enums import URLStatus from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.url.core.enums import URLSource diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py index a8a839d1..f8fb2351 100644 --- a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py @@ -1,6 +1,5 @@ from sqlalchemy.ext.asyncio import AsyncSession -from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.sqlalchemy import URL diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_no_html_content_not_picked_up.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_no_html_content_not_picked_up.py index 25c4d09d..9c767f71 100644 --- a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_no_html_content_not_picked_up.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_no_html_content_not_picked_up.py @@ -3,7 +3,6 @@ from src.core.enums import RecordType from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator -from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse from src.db.client.async_ import AsyncDatabaseClient from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.check import check_not_called from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \ diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_not_relevant_picked_up.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_not_relevant_picked_up.py index b4abc0ee..d4c9d4c8 100644 --- a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_not_relevant_picked_up.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_not_relevant_picked_up.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from src.core.enums import RecordType from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator @@ -9,11 +8,11 @@ from src.db.client.async_ import AsyncDatabaseClient from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.check import check_results_called from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.data import generate_expected_outputs +from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \ + PushToHuggingFaceTestSetupStatusEnum from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.helper import setup_urls from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.input import \ TestPushToHuggingFaceURLSetupEntryInput -from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \ - PushToHuggingFaceTestSetupStatusEnum from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_validated_picked_up.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_validated_picked_up.py index 4ca89aa1..4ac74f4e 100644 --- a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_validated_picked_up.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_validated_picked_up.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from src.core.enums import RecordType from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator @@ -9,11 +8,11 @@ from src.db.client.async_ import AsyncDatabaseClient from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.check import check_results_called from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.data import generate_expected_outputs +from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \ + PushToHuggingFaceTestSetupStatusEnum from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.helper import setup_urls from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.input import \ TestPushToHuggingFaceURLSetupEntryInput -from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \ - PushToHuggingFaceTestSetupStatusEnum from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error diff --git a/tests/automated/integration/tasks/scheduled/impl/internet_archives/probe/test_entry_not_found.py b/tests/automated/integration/tasks/scheduled/impl/internet_archives/probe/test_entry_not_found.py index 8a2157ed..80e6c129 100644 --- a/tests/automated/integration/tasks/scheduled/impl/internet_archives/probe/test_entry_not_found.py +++ b/tests/automated/integration/tasks/scheduled/impl/internet_archives/probe/test_entry_not_found.py @@ -4,8 +4,8 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.flag.checked_for_ia.sqlalchemy import FlagURLCheckedForInternetArchives from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata -from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.scheduled.impl.internet_archives.probe.setup import add_urls +from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/scheduled/impl/internet_archives/probe/test_happy_path.py b/tests/automated/integration/tasks/scheduled/impl/internet_archives/probe/test_happy_path.py index 90131605..96174e6b 100644 --- a/tests/automated/integration/tasks/scheduled/impl/internet_archives/probe/test_happy_path.py +++ b/tests/automated/integration/tasks/scheduled/impl/internet_archives/probe/test_happy_path.py @@ -6,9 +6,9 @@ from src.db.models.impl.flag.checked_for_ia.sqlalchemy import FlagURLCheckedForInternetArchives from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata from src.external.internet_archives.models.capture import IACapture -from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.scheduled.impl.internet_archives.probe.constants import TEST_URL_1, TEST_URL_2 from tests.automated.integration.tasks.scheduled.impl.internet_archives.probe.setup import add_urls +from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py index f9faf657..2e57e042 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py @@ -5,7 +5,6 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.external.pdap.client import PDAPClient -from src.external.pdap.enums import DataSourcesURLStatus from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel from src.external.pdap.impl.sync.data_sources.add.request import AddDataSourcesOuterRequest, AddDataSourcesInnerRequest from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseModel, \ diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_optional_ds_metadata.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_optional_ds_metadata.py index 94273019..6d52afc2 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_optional_ds_metadata.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_optional_ds_metadata.py @@ -1,7 +1,5 @@ from datetime import date -from sqlalchemy import update - from src.api.shared.models.message_response import MessageResponse from src.core.enums import RecordType from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.core import \ diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/survey/test_survey_flag.py b/tests/automated/integration/tasks/url/impl/agency_identification/survey/test_survey_flag.py index 74e31306..feeba3bd 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/survey/test_survey_flag.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/survey/test_survey_flag.py @@ -5,6 +5,7 @@ from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from tests.helpers.data_creator.core import DBDataCreator + @pytest.mark.asyncio async def test_survey_flag( operator: AgencyIdentificationTaskOperator, diff --git a/tests/automated/integration/tasks/url/impl/html/mocks/methods.py b/tests/automated/integration/tasks/url/impl/html/mocks/methods.py index 5e2533d0..0e0c5657 100644 --- a/tests/automated/integration/tasks/url/impl/html/mocks/methods.py +++ b/tests/automated/integration/tasks/url/impl/html/mocks/methods.py @@ -1,5 +1,3 @@ -from typing import Optional - from src.core.tasks.url.operators.html.scraper.parser.dtos.response_html import ResponseHTMLInfo diff --git a/tests/automated/integration/tasks/url/impl/html/test_200.py b/tests/automated/integration/tasks/url/impl/html/test_200.py index 886d4131..cdfbd2fe 100644 --- a/tests/automated/integration/tasks/url/impl/html/test_200.py +++ b/tests/automated/integration/tasks/url/impl/html/test_200.py @@ -12,7 +12,6 @@ from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_met, assert_task_ran_without_error from tests.helpers.data_creator.core import DBDataCreator - MOCK_HTML_CONTENT = """ diff --git a/tests/automated/integration/tasks/url/impl/html/test_404.py b/tests/automated/integration/tasks/url/impl/html/test_404.py index 7057b70e..51589277 100644 --- a/tests/automated/integration/tasks/url/impl/html/test_404.py +++ b/tests/automated/integration/tasks/url/impl/html/test_404.py @@ -12,6 +12,7 @@ from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_met, assert_task_ran_without_error from tests.helpers.data_creator.core import DBDataCreator + class _MockURLRequestInterface: async def make_requests_with_html(self, urls: list[str]) -> list[URLResponseInfo]: diff --git a/tests/automated/integration/tasks/url/impl/html/test_error.py b/tests/automated/integration/tasks/url/impl/html/test_error.py index b00667ed..1290460f 100644 --- a/tests/automated/integration/tasks/url/impl/html/test_error.py +++ b/tests/automated/integration/tasks/url/impl/html/test_error.py @@ -12,6 +12,7 @@ from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_met, assert_task_ran_without_error from tests.helpers.data_creator.core import DBDataCreator + class _MockURLRequestInterface: async def make_requests_with_html(self, urls: list[str]) -> list[URLResponseInfo]: diff --git a/tests/automated/integration/tasks/url/impl/html/test_no_web_metadata.py b/tests/automated/integration/tasks/url/impl/html/test_no_web_metadata.py index 36149177..06442164 100644 --- a/tests/automated/integration/tasks/url/impl/html/test_no_web_metadata.py +++ b/tests/automated/integration/tasks/url/impl/html/test_no_web_metadata.py @@ -4,7 +4,6 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_not_met, assert_task_ran_without_error -from tests.helpers.data_creator.core import DBDataCreator @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/url/impl/probe/check/manager.py b/tests/automated/integration/tasks/url/impl/probe/check/manager.py index 9dd7f13d..10505920 100644 --- a/tests/automated/integration/tasks/url/impl/probe/check/manager.py +++ b/tests/automated/integration/tasks/url/impl/probe/check/manager.py @@ -3,8 +3,8 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.link.url_redirect_url.sqlalchemy import LinkURLRedirectURL from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata -from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView class TestURLProbeCheckManager: diff --git a/tests/automated/integration/tasks/url/impl/probe/models/__init__.py b/tests/automated/integration/tasks/url/impl/probe/models/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/automated/integration/tasks/url/impl/probe/models/entry.py b/tests/automated/integration/tasks/url/impl/probe/models/entry.py deleted file mode 100644 index 810f40ea..00000000 --- a/tests/automated/integration/tasks/url/impl/probe/models/entry.py +++ /dev/null @@ -1,10 +0,0 @@ -from pydantic import BaseModel - -from src.collectors.enums import URLStatus -from src.external.url_request.probe.models.wrapper import URLProbeResponseOuterWrapper - - -class TestURLProbeTaskEntry(BaseModel): - url: str - url_status: URLStatus - planned_response: URLProbeResponseOuterWrapper \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py index 17b80c50..787d0d33 100644 --- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py @@ -1,9 +1,6 @@ import pytest -from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py index d6d5e4d2..866e7533 100644 --- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py @@ -1,8 +1,6 @@ import pytest -from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py index 86aa3438..dca1349e 100644 --- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py index 6632277f..d628ea53 100644 --- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from src.db.models.impl.url.core.sqlalchemy import URL from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py b/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py index ef4fba57..f7f9cb6e 100644 --- a/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py +++ b/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py b/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py index d1b73274..92729102 100644 --- a/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py +++ b/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from src.db.models.impl.url.web_metadata.insert import URLWebMetadataPydantic from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/test_functional_equivalent.py b/tests/automated/integration/tasks/url/impl/probe/redirect/test_functional_equivalent.py index cc6ef650..cbf59b20 100644 --- a/tests/automated/integration/tasks/url/impl/probe/redirect/test_functional_equivalent.py +++ b/tests/automated/integration/tasks/url/impl/probe/redirect/test_functional_equivalent.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.util.models.full_url import FullURL diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py b/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py index c8654b85..e8216f17 100644 --- a/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py +++ b/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from src.util.models.full_url import FullURL from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager diff --git a/tests/automated/integration/tasks/url/impl/probe/setup/manager.py b/tests/automated/integration/tasks/url/impl/probe/setup/manager.py index 4b3d16c2..bf65e9f6 100644 --- a/tests/automated/integration/tasks/url/impl/probe/setup/manager.py +++ b/tests/automated/integration/tasks/url/impl/probe/setup/manager.py @@ -1,6 +1,5 @@ from typing import cast, Literal -from src.collectors.enums import URLStatus from src.core.tasks.url.operators.probe.core import URLProbeTaskOperator from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.url.core.pydantic.insert import URLInsertModel diff --git a/tests/automated/integration/tasks/url/impl/root_url/test_two_branches_one_root_in_db_not_flagged.py b/tests/automated/integration/tasks/url/impl/root_url/test_two_branches_one_root_in_db_not_flagged.py index 8a40a476..384966a8 100644 --- a/tests/automated/integration/tasks/url/impl/root_url/test_two_branches_one_root_in_db_not_flagged.py +++ b/tests/automated/integration/tasks/url/impl/root_url/test_two_branches_one_root_in_db_not_flagged.py @@ -1,7 +1,6 @@ import pytest from src.core.tasks.url.operators.root_url.core import URLRootURLTaskOperator -from src.db.models.impl.flag.root_url.pydantic import FlagRootURLPydantic from src.db.models.impl.flag.root_url.sqlalchemy import FlagRootURL from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL from src.db.models.impl.url.core.enums import URLSource diff --git a/tests/automated/integration/tasks/url/impl/test_example_task.py b/tests/automated/integration/tasks/url/impl/test_example_task.py index 00ec7c34..c54425f7 100644 --- a/tests/automated/integration/tasks/url/impl/test_example_task.py +++ b/tests/automated/integration/tasks/url/impl/test_example_task.py @@ -2,12 +2,13 @@ import pytest -from src.db.enums import TaskType from src.core.tasks.url.enums import TaskOperatorOutcome from src.core.tasks.url.operators.base import URLTaskOperatorBase +from src.db.enums import TaskType from src.db.models.impl.link.task_url import LinkTaskURL from tests.helpers.data_creator.core import DBDataCreator + class ExampleTaskOperator( URLTaskOperatorBase, ): diff --git a/tests/automated/integration/tasks/url/impl/test_url_miscellaneous_metadata_task.py b/tests/automated/integration/tasks/url/impl/test_url_miscellaneous_metadata_task.py index bc3f240d..8a907fdc 100644 --- a/tests/automated/integration/tasks/url/impl/test_url_miscellaneous_metadata_task.py +++ b/tests/automated/integration/tasks/url/impl/test_url_miscellaneous_metadata_task.py @@ -2,11 +2,11 @@ import pytest -from src.core.tasks.url.operators.misc_metadata.core import URLMiscellaneousMetadataTaskOperator -from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata -from src.db.models.impl.url.core.sqlalchemy import URL from src.collectors.enums import CollectorType from src.core.tasks.url.enums import TaskOperatorOutcome +from src.core.tasks.url.operators.misc_metadata.core import URLMiscellaneousMetadataTaskOperator +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from tests.helpers.data_creator.core import DBDataCreator diff --git a/tests/automated/integration/tasks/url/impl/test_url_record_type_task.py b/tests/automated/integration/tasks/url/impl/test_url_record_type_task.py index d9f1de4f..84471a70 100644 --- a/tests/automated/integration/tasks/url/impl/test_url_record_type_task.py +++ b/tests/automated/integration/tasks/url/impl/test_url_record_type_task.py @@ -2,14 +2,15 @@ import pytest -from src.db.enums import TaskType -from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType +from src.core.enums import RecordType from src.core.tasks.url.enums import TaskOperatorOutcome from src.core.tasks.url.operators.record_type.core import URLRecordTypeTaskOperator -from src.core.enums import RecordType +from src.core.tasks.url.operators.record_type.llm_api.record_classifier.deepseek import DeepSeekRecordClassifier +from src.db.enums import TaskType +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from tests.helpers.data_creator.core import DBDataCreator -from src.core.tasks.url.operators.record_type.llm_api.record_classifier.deepseek import DeepSeekRecordClassifier + @pytest.mark.asyncio async def test_url_record_type_task(db_data_creator: DBDataCreator): diff --git a/tests/automated/integration/tasks/url/impl/validate/test_data_source.py b/tests/automated/integration/tasks/url/impl/validate/test_data_source.py index 95d636c2..b17f726e 100644 --- a/tests/automated/integration/tasks/url/impl/validate/test_data_source.py +++ b/tests/automated/integration/tasks/url/impl/validate/test_data_source.py @@ -14,10 +14,10 @@ from src.core.tasks.url.operators.validate.core import AutoValidateURLTaskOperator from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement from tests.automated.integration.tasks.url.impl.validate.helper import TestValidateTaskHelper, DEFAULT_RECORD_TYPE from tests.helpers.run import run_task_and_confirm_success diff --git a/tests/automated/unit/core/test_core_logger.py b/tests/automated/unit/core/test_core_logger.py index 6c4f0375..01dae052 100644 --- a/tests/automated/unit/core/test_core_logger.py +++ b/tests/automated/unit/core/test_core_logger.py @@ -3,8 +3,8 @@ import pytest -from src.db.models.impl.log.pydantic.info import LogInfo from src.core.logger import AsyncCoreLogger +from src.db.models.impl.log.pydantic.info import LogInfo @pytest.mark.asyncio diff --git a/tests/automated/unit/security_manager/test_security_manager.py b/tests/automated/unit/security_manager/test_security_manager.py index 66399d7f..ae58ed6e 100644 --- a/tests/automated/unit/security_manager/test_security_manager.py +++ b/tests/automated/unit/security_manager/test_security_manager.py @@ -4,9 +4,9 @@ from fastapi import HTTPException from jwt import InvalidTokenError -from src.security.manager import SecurityManager, get_access_info from src.security.dtos.access_info import AccessInfo from src.security.enums import Permissions +from src.security.manager import SecurityManager, get_access_info SECRET_KEY = "test_secret_key" VALID_TOKEN = "valid_token" diff --git a/tests/automated/unit/source_collectors/test_autogoogler_collector.py b/tests/automated/unit/source_collectors/test_autogoogler_collector.py index cc191dc3..e4e617a1 100644 --- a/tests/automated/unit/source_collectors/test_autogoogler_collector.py +++ b/tests/automated/unit/source_collectors/test_autogoogler_collector.py @@ -2,11 +2,11 @@ import pytest -from src.collectors.impl.auto_googler.dtos.query_results import GoogleSearchQueryResultsInnerDTO +from src.collectors.impl.auto_googler.collector import AutoGooglerCollector from src.collectors.impl.auto_googler.dtos.input import AutoGooglerInputDTO -from src.db.client.async_ import AsyncDatabaseClient +from src.collectors.impl.auto_googler.dtos.query_results import GoogleSearchQueryResultsInnerDTO from src.core.logger import AsyncCoreLogger -from src.collectors.impl.auto_googler.collector import AutoGooglerCollector +from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.pydantic.info import URLInfo diff --git a/tests/automated/unit/source_collectors/test_common_crawl_collector.py b/tests/automated/unit/source_collectors/test_common_crawl_collector.py index 0a10680f..c76bad38 100644 --- a/tests/automated/unit/source_collectors/test_common_crawl_collector.py +++ b/tests/automated/unit/source_collectors/test_common_crawl_collector.py @@ -2,10 +2,10 @@ import pytest +from src.collectors.impl.common_crawler.collector import CommonCrawlerCollector from src.collectors.impl.common_crawler.input import CommonCrawlerInputDTO -from src.db.client.async_ import AsyncDatabaseClient from src.core.logger import AsyncCoreLogger -from src.collectors.impl.common_crawler.collector import CommonCrawlerCollector +from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.pydantic.info import URLInfo diff --git a/tests/automated/unit/source_collectors/test_example_collector.py b/tests/automated/unit/source_collectors/test_example_collector.py index 632a6293..c99217b0 100644 --- a/tests/automated/unit/source_collectors/test_example_collector.py +++ b/tests/automated/unit/source_collectors/test_example_collector.py @@ -1,9 +1,9 @@ from unittest.mock import AsyncMock -from src.db.client.sync import DatabaseClient -from src.collectors.impl.example.dtos.input import ExampleInputDTO from src.collectors.impl.example.core import ExampleCollector +from src.collectors.impl.example.dtos.input import ExampleInputDTO from src.core.logger import AsyncCoreLogger +from src.db.client.sync import DatabaseClient def test_example_collector(): diff --git a/tests/automated/unit/source_collectors/test_muckrock_collectors.py b/tests/automated/unit/source_collectors/test_muckrock_collectors.py index 6c845b8e..009e550a 100644 --- a/tests/automated/unit/source_collectors/test_muckrock_collectors.py +++ b/tests/automated/unit/source_collectors/test_muckrock_collectors.py @@ -4,12 +4,12 @@ import pytest from src.collectors.impl.muckrock.collectors.county.core import MuckrockCountyLevelSearchCollector -from src.collectors.impl.muckrock.collectors.simple.core import MuckrockSimpleSearchCollector -from src.db.client.async_ import AsyncDatabaseClient -from src.core.logger import AsyncCoreLogger from src.collectors.impl.muckrock.collectors.county.dto import MuckrockCountySearchCollectorInputDTO +from src.collectors.impl.muckrock.collectors.simple.core import MuckrockSimpleSearchCollector from src.collectors.impl.muckrock.collectors.simple.dto import MuckrockSimpleSearchCollectorInputDTO from src.collectors.impl.muckrock.fetch_requests.foia import FOIAFetchRequest +from src.core.logger import AsyncCoreLogger +from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.pydantic.info import URLInfo diff --git a/tests/helpers/batch_creation_parameters/core.py b/tests/helpers/batch_creation_parameters/core.py index 4562cbdf..3719dae0 100644 --- a/tests/helpers/batch_creation_parameters/core.py +++ b/tests/helpers/batch_creation_parameters/core.py @@ -1,5 +1,4 @@ import datetime -from typing import Optional from pydantic import BaseModel, model_validator diff --git a/tests/helpers/data_creator/commands/impl/html_data.py b/tests/helpers/data_creator/commands/impl/html_data.py index 38ecb4bd..dbfe39f1 100644 --- a/tests/helpers/data_creator/commands/impl/html_data.py +++ b/tests/helpers/data_creator/commands/impl/html_data.py @@ -1,11 +1,9 @@ -from src.db.dtos.url.html_content import URLHTMLContentInfo -from src.db.models.impl.url.html.content.enums import HTMLContentType from src.db.dtos.url.raw_html import RawHTMLInfo +from src.db.models.impl.url.html.content.enums import HTMLContentType from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent from src.db.models.impl.url.scrape_info.enums import ScrapeStatus from src.db.models.impl.url.scrape_info.pydantic import URLScrapeInfoInsertModel from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase -from tests.helpers.data_creator.models.clients import DBDataCreatorClientContainer class HTMLDataCreatorCommand(DBDataCreatorCommandBase): diff --git a/tests/helpers/data_creator/commands/impl/suggestion/agency_confirmed.py b/tests/helpers/data_creator/commands/impl/suggestion/agency_confirmed.py index e096d15e..0a293e71 100644 --- a/tests/helpers/data_creator/commands/impl/suggestion/agency_confirmed.py +++ b/tests/helpers/data_creator/commands/impl/suggestion/agency_confirmed.py @@ -7,6 +7,7 @@ from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase from tests.helpers.data_creator.commands.impl.agency import AgencyCommand + @final class AgencyConfirmedSuggestionCommand(DBDataCreatorCommandBase): diff --git a/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/core.py b/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/core.py index ab29a817..e714714d 100644 --- a/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/core.py +++ b/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/core.py @@ -10,6 +10,7 @@ from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase from tests.helpers.data_creator.commands.impl.agency import AgencyCommand + @final class AgencyAutoSuggestionsCommand(DBDataCreatorCommandBase): diff --git a/tests/helpers/data_creator/commands/impl/urls_/convert.py b/tests/helpers/data_creator/commands/impl/urls_/convert.py index c1e2db31..a7d2bdd1 100644 --- a/tests/helpers/data_creator/commands/impl/urls_/convert.py +++ b/tests/helpers/data_creator/commands/impl/urls_/convert.py @@ -1,25 +1,6 @@ -from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.batch_creation_parameters.enums import URLCreationEnum - -def convert_url_creation_enum_to_url_status(url_creation_enum: URLCreationEnum) -> URLStatus: - match url_creation_enum: - case URLCreationEnum.OK: - return URLStatus.OK - case URLCreationEnum.SUBMITTED: - return URLStatus.OK - case URLCreationEnum.VALIDATED: - return URLStatus.OK - case URLCreationEnum.NOT_RELEVANT: - return URLStatus.OK - case URLCreationEnum.ERROR: - raise ValueError("Invalid URL Status") - case URLCreationEnum.DUPLICATE: - return URLStatus.DUPLICATE - case _: - raise ValueError(f"Unknown URLCreationEnum: {url_creation_enum}") - def convert_url_creation_enum_to_validated_type( url_creation_enum: URLCreationEnum ) -> URLType: diff --git a/tests/helpers/data_creator/commands/impl/urls_/query.py b/tests/helpers/data_creator/commands/impl/urls_/query.py index c56a88ef..fd40834d 100644 --- a/tests/helpers/data_creator/commands/impl/urls_/query.py +++ b/tests/helpers/data_creator/commands/impl/urls_/query.py @@ -1,13 +1,12 @@ from datetime import datetime -from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource -from tests.helpers.data_creator.commands.impl.urls_.tdo import SubmittedURLInfo from src.db.dtos.url.insert import InsertURLsInfo from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.pydantic.info import URLInfo +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from tests.helpers.batch_creation_parameters.enums import URLCreationEnum from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase -from tests.helpers.data_creator.commands.impl.urls_.convert import convert_url_creation_enum_to_url_status +from tests.helpers.data_creator.commands.impl.urls_.tdo import SubmittedURLInfo from tests.helpers.simple_test_data_functions import generate_test_urls diff --git a/tests/helpers/data_creator/commands/impl/urls_/tdo.py b/tests/helpers/data_creator/commands/impl/urls_/tdo.py index a8991dcd..fdb5a1cc 100644 --- a/tests/helpers/data_creator/commands/impl/urls_/tdo.py +++ b/tests/helpers/data_creator/commands/impl/urls_/tdo.py @@ -2,8 +2,6 @@ from pydantic import BaseModel -from src.core.enums import RecordType - class SubmittedURLInfo(BaseModel): url_id: int diff --git a/tests/helpers/data_creator/commands/impl/urls_v2/core.py b/tests/helpers/data_creator/commands/impl/urls_v2/core.py index f7042720..20edd618 100644 --- a/tests/helpers/data_creator/commands/impl/urls_v2/core.py +++ b/tests/helpers/data_creator/commands/impl/urls_v2/core.py @@ -1,16 +1,13 @@ from datetime import datetime from src.db.dtos.url.insert import InsertURLsInfo -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from tests.helpers.batch_creation_parameters.enums import URLCreationEnum from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase from tests.helpers.data_creator.commands.impl.annotate import AnnotateCommand from tests.helpers.data_creator.commands.impl.html_data import HTMLDataCreatorCommand -from tests.helpers.data_creator.commands.impl.urls_.convert import convert_url_creation_enum_to_validated_type from tests.helpers.data_creator.commands.impl.urls_.query import URLsDBDataCreatorCommand from tests.helpers.data_creator.commands.impl.urls_v2.response import URLsV2Response -from tests.helpers.data_creator.generate import generate_validated_flags from tests.helpers.data_creator.models.creation_info.url import URLCreationInfo diff --git a/tests/helpers/data_creator/commands/impl/urls_v2/response.py b/tests/helpers/data_creator/commands/impl/urls_v2/response.py index 74aa8e20..935785e2 100644 --- a/tests/helpers/data_creator/commands/impl/urls_v2/response.py +++ b/tests/helpers/data_creator/commands/impl/urls_v2/response.py @@ -1,6 +1,5 @@ from pydantic import BaseModel -from src.collectors.enums import URLStatus from tests.helpers.batch_creation_parameters.enums import URLCreationEnum from tests.helpers.data_creator.models.creation_info.url import URLCreationInfo diff --git a/tests/helpers/data_creator/core.py b/tests/helpers/data_creator/core.py index 1b4d6cb7..c1e27ae3 100644 --- a/tests/helpers/data_creator/core.py +++ b/tests/helpers/data_creator/core.py @@ -3,7 +3,7 @@ from typing import Optional, Any from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo -from src.collectors.enums import CollectorType, URLStatus +from src.collectors.enums import CollectorType from src.core.enums import BatchStatus, SuggestionType, RecordType from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo from src.core.tasks.url.operators.misc_metadata.tdo import URLMiscellaneousMetadataTDO @@ -20,13 +20,13 @@ from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.models.impl.duplicate.pydantic.insert import DuplicateInsertInfo from src.db.models.impl.flag.root_url.sqlalchemy import FlagRootURL from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL -from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.impl.url.core.enums import URLSource diff --git a/tests/helpers/data_creator/create.py b/tests/helpers/data_creator/create.py index 73ad8c63..1c2073fd 100644 --- a/tests/helpers/data_creator/create.py +++ b/tests/helpers/data_creator/create.py @@ -1,6 +1,6 @@ from datetime import datetime -from src.collectors.enums import CollectorType, URLStatus +from src.collectors.enums import CollectorType from src.core.enums import BatchStatus, RecordType from src.db import County, Locality, USState from src.db.client.async_ import AsyncDatabaseClient diff --git a/tests/helpers/data_creator/generate.py b/tests/helpers/data_creator/generate.py index 6c5f8071..aa63b202 100644 --- a/tests/helpers/data_creator/generate.py +++ b/tests/helpers/data_creator/generate.py @@ -1,11 +1,10 @@ from datetime import datetime -from src.collectors.enums import URLStatus, CollectorType -from src.core.enums import BatchStatus, RecordType +from src.collectors.enums import CollectorType +from src.core.enums import BatchStatus from src.db.models.impl.batch.pydantic.insert import BatchInsertModel from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.batch_url.pydantic import LinkBatchURLPydantic from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.pydantic.insert import URLInsertModel diff --git a/tests/helpers/setup/annotation/core.py b/tests/helpers/setup/annotation/core.py index 70123cb9..10bc67b7 100644 --- a/tests/helpers/setup/annotation/core.py +++ b/tests/helpers/setup/annotation/core.py @@ -1,4 +1,3 @@ -from src.collectors.enums import URLStatus from tests.helpers.batch_creation_parameters.enums import URLCreationEnum from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.setup.annotation.model import AnnotationSetupInfo diff --git a/tests/helpers/setup/final_review/core.py b/tests/helpers/setup/final_review/core.py index c474fe2c..20c0f8df 100644 --- a/tests/helpers/setup/final_review/core.py +++ b/tests/helpers/setup/final_review/core.py @@ -1,5 +1,3 @@ -from typing import Optional - from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo from src.core.enums import RecordType from src.db.models.impl.flag.url_validated.enums import URLType diff --git a/tests/helpers/setup/wipe.py b/tests/helpers/setup/wipe.py index f6cd3582..7d4f0672 100644 --- a/tests/helpers/setup/wipe.py +++ b/tests/helpers/setup/wipe.py @@ -1,4 +1,4 @@ -from sqlalchemy import create_engine, Engine +from sqlalchemy import Engine from src.db.models.templates_.base import Base diff --git a/tests/manual/api/test_contributions.py b/tests/manual/api/test_contributions.py index 90d8e8de..6689ffdf 100644 --- a/tests/manual/api/test_contributions.py +++ b/tests/manual/api/test_contributions.py @@ -1,9 +1,9 @@ import pytest -from src.api.endpoints.contributions.leaderboard.query import GetContributionsLeaderboardQueryBuilder from src.api.endpoints.contributions.user.queries.core import GetUserContributionsQueryBuilder from src.db.client.async_ import AsyncDatabaseClient + # 72 = Max # 17 = Josh diff --git a/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py b/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py index 22203910..6eedb7f0 100644 --- a/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py +++ b/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py @@ -2,9 +2,9 @@ import dotenv -from src.db.models.impl.batch.pydantic.info import BatchInfo from src.collectors.enums import CollectorType from src.core.enums import BatchStatus +from src.db.models.impl.batch.pydantic.info import BatchInfo def test_auto_googler_collector_lifecycle(test_core): diff --git a/tests/manual/core/lifecycle/test_ckan_lifecycle.py b/tests/manual/core/lifecycle/test_ckan_lifecycle.py index 66020a92..85bfca55 100644 --- a/tests/manual/core/lifecycle/test_ckan_lifecycle.py +++ b/tests/manual/core/lifecycle/test_ckan_lifecycle.py @@ -1,8 +1,9 @@ -from src.db.models.impl.batch.pydantic.info import BatchInfo +from test_automated.integration.core.helpers.common_test_procedures import run_collector_and_wait_for_completion + from src.collectors import CollectorType -from src.core.enums import BatchStatus from src.collectors.impl.ckan import group_search, package_search, organization_search -from test_automated.integration.core.helpers.common_test_procedures import run_collector_and_wait_for_completion +from src.core.enums import BatchStatus +from src.db.models.impl.batch.pydantic.info import BatchInfo def test_ckan_lifecycle(test_core): diff --git a/tests/manual/core/lifecycle/test_muckrock_lifecycles.py b/tests/manual/core/lifecycle/test_muckrock_lifecycles.py index 216638dc..c78a8199 100644 --- a/tests/manual/core/lifecycle/test_muckrock_lifecycles.py +++ b/tests/manual/core/lifecycle/test_muckrock_lifecycles.py @@ -1,9 +1,10 @@ -from src.db.models.impl.batch.pydantic.info import BatchInfo -from src.collectors import CollectorType -from src.core.enums import BatchStatus from test_automated.integration.core.helpers.common_test_procedures import run_collector_and_wait_for_completion from test_automated.integration.core.helpers.constants import ALLEGHENY_COUNTY_MUCKROCK_ID, ALLEGHENY_COUNTY_TOWN_NAMES +from src.collectors import CollectorType +from src.core.enums import BatchStatus +from src.db.models.impl.batch.pydantic.info import BatchInfo + def test_muckrock_simple_search_collector_lifecycle(test_core): ci = test_core diff --git a/tests/manual/core/tasks/scheduled/test_push_to_huggingface.py b/tests/manual/core/tasks/scheduled/test_push_to_huggingface.py index a091ff5c..3a864bae 100644 --- a/tests/manual/core/tasks/scheduled/test_push_to_huggingface.py +++ b/tests/manual/core/tasks/scheduled/test_push_to_huggingface.py @@ -1,8 +1,6 @@ import pytest - from environs import Env -from src.core.env_var_manager import EnvVarManager from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator from src.db.client.async_ import AsyncDatabaseClient from src.external.huggingface.hub.client import HuggingFaceHubClient diff --git a/tests/manual/external/huggingface/inference/test_relevancy.py b/tests/manual/external/huggingface/inference/test_relevancy.py index e001d864..abe4296b 100644 --- a/tests/manual/external/huggingface/inference/test_relevancy.py +++ b/tests/manual/external/huggingface/inference/test_relevancy.py @@ -1,12 +1,11 @@ import pytest from aiohttp import ClientSession +from environs import Env from src.external.huggingface.inference.client import HuggingFaceInferenceClient from src.external.huggingface.inference.models.input import BasicInput from tests.manual.external.huggingface.inference.constants import EXAMPLE_WEBSITE -from environs import Env - @pytest.mark.asyncio async def test_huggingface_inference_relevancy_annotation(): diff --git a/tests/manual/external/internet_archive/test_search.py b/tests/manual/external/internet_archive/test_search.py index 930d0304..41dcee1f 100644 --- a/tests/manual/external/internet_archive/test_search.py +++ b/tests/manual/external/internet_archive/test_search.py @@ -2,7 +2,6 @@ from aiohttp import ClientSession from src.external.internet_archives.client import InternetArchivesClient -from src.external.internet_archives.models.capture import IACapture # BASE_URL = "nola.gov/getattachment/NOPD/Policies/Chapter-12-1-Department-Operations-Manual-EFFECTIVE-1-14-18.pdf/" BASE_URL = "example.com" diff --git a/tests/manual/llm_api_logic/test_deepseek_record_classifier.py b/tests/manual/llm_api_logic/test_deepseek_record_classifier.py index f26f2a6f..0ec3ba16 100644 --- a/tests/manual/llm_api_logic/test_deepseek_record_classifier.py +++ b/tests/manual/llm_api_logic/test_deepseek_record_classifier.py @@ -1,7 +1,7 @@ import pytest -from src.db.dtos.url.html_content import URLHTMLContentInfo from src.core.tasks.url.operators.record_type.llm_api.record_classifier.deepseek import DeepSeekRecordClassifier +from src.db.dtos.url.html_content import URLHTMLContentInfo @pytest.mark.asyncio diff --git a/tests/manual/llm_api_logic/test_openai_record_classifier.py b/tests/manual/llm_api_logic/test_openai_record_classifier.py index 3b3ec08b..25208b63 100644 --- a/tests/manual/llm_api_logic/test_openai_record_classifier.py +++ b/tests/manual/llm_api_logic/test_openai_record_classifier.py @@ -1,7 +1,7 @@ import pytest -from src.db.dtos.url.html_content import URLHTMLContentInfo from src.core.tasks.url.operators.record_type.llm_api.record_classifier.openai import OpenAIRecordClassifier +from src.db.dtos.url.html_content import URLHTMLContentInfo @pytest.mark.asyncio diff --git a/tests/manual/source_collectors/test_autogoogler_collector.py b/tests/manual/source_collectors/test_autogoogler_collector.py index 39d1f8e7..e0f609cb 100644 --- a/tests/manual/source_collectors/test_autogoogler_collector.py +++ b/tests/manual/source_collectors/test_autogoogler_collector.py @@ -1,13 +1,14 @@ from unittest.mock import AsyncMock import pytest +from environs import Env +from src.collectors.impl.auto_googler.collector import AutoGooglerCollector from src.collectors.impl.auto_googler.dtos.input import AutoGooglerInputDTO from src.core.env_var_manager import EnvVarManager from src.core.logger import AsyncCoreLogger -from src.collectors.impl.auto_googler.collector import AutoGooglerCollector from src.db.client.async_ import AsyncDatabaseClient -from environs import Env + @pytest.mark.asyncio async def test_autogoogler_collector(monkeypatch): diff --git a/tests/manual/source_collectors/test_ckan_collector.py b/tests/manual/source_collectors/test_ckan_collector.py index 9b5edc9f..753c8a30 100644 --- a/tests/manual/source_collectors/test_ckan_collector.py +++ b/tests/manual/source_collectors/test_ckan_collector.py @@ -4,9 +4,8 @@ from marshmallow import Schema, fields from src.collectors.impl.ckan.collector import CKANCollector -from src.core.logger import AsyncCoreLogger -from src.collectors.impl.ckan import collector from src.collectors.impl.ckan.dtos.input import CKANInputDTO +from src.core.logger import AsyncCoreLogger class CKANSchema(Schema): diff --git a/tests/manual/source_collectors/test_common_crawler_collector.py b/tests/manual/source_collectors/test_common_crawler_collector.py index e508c2ac..61e6fdbc 100644 --- a/tests/manual/source_collectors/test_common_crawler_collector.py +++ b/tests/manual/source_collectors/test_common_crawler_collector.py @@ -3,9 +3,8 @@ import pytest from marshmallow import Schema, fields -from src.core.logger import AsyncCoreLogger -from src.collectors.impl.common_crawler import collector from src.collectors.impl.common_crawler import CommonCrawlerInputDTO +from src.core.logger import AsyncCoreLogger class CommonCrawlerSchema(Schema): diff --git a/tests/manual/source_collectors/test_muckrock_collectors.py b/tests/manual/source_collectors/test_muckrock_collectors.py index d8153c6b..0a69cfc0 100644 --- a/tests/manual/source_collectors/test_muckrock_collectors.py +++ b/tests/manual/source_collectors/test_muckrock_collectors.py @@ -3,16 +3,17 @@ import pytest from marshmallow import Schema, fields -from src.core.logger import AsyncCoreLogger +from src.collectors.impl import MuckrockSimpleSearchCollector, \ + MuckrockCountyLevelSearchCollector, MuckrockAllFOIARequestsCollector from src.collectors.impl.muckrock.collectors.all_foia.dto import MuckrockAllFOIARequestsCollectorInputDTO from src.collectors.impl.muckrock.collectors.county.dto import MuckrockCountySearchCollectorInputDTO from src.collectors.impl.muckrock.collectors.simple.dto import MuckrockSimpleSearchCollectorInputDTO -from src.collectors.impl import MuckrockSimpleSearchCollector, \ - MuckrockCountyLevelSearchCollector, MuckrockAllFOIARequestsCollector +from src.core.logger import AsyncCoreLogger from src.db.client.async_ import AsyncDatabaseClient from tests.automated.integration.core.helpers.constants import ALLEGHENY_COUNTY_MUCKROCK_ID, \ ALLEGHENY_COUNTY_TOWN_NAMES + class MuckrockURLInfoSchema(Schema): url = fields.String(required=True) metadata = fields.Dict(required=True) diff --git a/tests/manual/unsorted/test_common_crawler_integration.py b/tests/manual/unsorted/test_common_crawler_integration.py index 4b79893a..d458079d 100644 --- a/tests/manual/unsorted/test_common_crawler_integration.py +++ b/tests/manual/unsorted/test_common_crawler_integration.py @@ -1,10 +1,7 @@ import csv -import datetime -import json import os import shutil import tempfile -from unittest.mock import patch import pytest from common_crawler.cache import CommonCrawlerCacheManager From 3a344bdc9c37aa604215c1ccd061f9db80167489 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Thu, 1 Jan 2026 08:24:07 -0500 Subject: [PATCH 23/24] Tighten record type requirement for prerequisite --- src/api/endpoints/url/get/query.py | 1 - .../impl/data_sources/update/queries/cte.py | 2 +- src/core/tasks/url/manager.py | 9 ++++++--- src/db/models/impl/__init__.py | 3 ++- .../core/tasks/scheduled/ds_app_sync/__init__.py | 0 .../scheduled/ds_app_sync/data_sources/__init__.py | 0 .../ds_app_sync/data_sources/update/__init__.py | 0 .../ds_app_sync/data_sources/update/test_prereq.py | 13 +++++++++++++ 8 files changed, 22 insertions(+), 6 deletions(-) create mode 100644 tests/manual/core/tasks/scheduled/ds_app_sync/__init__.py create mode 100644 tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/__init__.py create mode 100644 tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/update/__init__.py create mode 100644 tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/update/test_prereq.py diff --git a/src/api/endpoints/url/get/query.py b/src/api/endpoints/url/get/query.py index a11bbd64..d9ba9047 100644 --- a/src/api/endpoints/url/get/query.py +++ b/src/api/endpoints/url/get/query.py @@ -3,7 +3,6 @@ from sqlalchemy import select, exists, RowMapping, func from sqlalchemy.dialects.postgresql import aggregate_order_by from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import selectinload from src.api.endpoints.url.get.dto import GetURLsResponseInfo, GetURLsResponseErrorInfo, GetURLsResponseInnerInfo from src.db.client.helpers import add_standard_limit_and_offset diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/cte.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/cte.py index 8f0ff65e..b1c21474 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/cte.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/cte.py @@ -18,7 +18,7 @@ def __init__(self): URL, URL.id == DSAppLinkDataSource.url_id, ) - .outerjoin( + .join( URLRecordType, URL.id == URLRecordType.url_id, ) diff --git a/src/core/tasks/url/manager.py b/src/core/tasks/url/manager.py index 7fc6b4e3..b3da8edd 100644 --- a/src/core/tasks/url/manager.py +++ b/src/core/tasks/url/manager.py @@ -52,9 +52,7 @@ async def _run_task(self, entry: URLTaskEntry) -> None: while meets_prereq: print(f"Running {operator.task_type.value} Task") if count > TASK_REPEAT_THRESHOLD: - message = f"Task {operator.task_type.value} has been run more than {TASK_REPEAT_THRESHOLD} times in a row. Task loop terminated." - print(message) - await self.handler.post_to_discord(message=message) + await self._alert_task_repeat_threshold_exceeded(operator) break run_info: TaskOperatorRunInfo = await operator.run_task() await self.conclude_task(run_info) @@ -63,6 +61,11 @@ async def _run_task(self, entry: URLTaskEntry) -> None: count += 1 meets_prereq = await operator.meets_task_prerequisites() + async def _alert_task_repeat_threshold_exceeded(self, operator): + message = f"Task {operator.task_type.value} has been run more than {TASK_REPEAT_THRESHOLD} times in a row. Task loop terminated." + print(message) + await self.handler.post_to_discord(message=message) + async def trigger_task_run(self) -> None: await self.task_trigger.trigger_or_rerun() diff --git a/src/db/models/impl/__init__.py b/src/db/models/impl/__init__.py index 9e679b72..5b4c9604 100644 --- a/src/db/models/impl/__init__.py +++ b/src/db/models/impl/__init__.py @@ -1,3 +1,4 @@ from .link.location_batch.sqlalchemy import LinkLocationBatch -from .link.batch_url.sqlalchemy import LinkBatchURL \ No newline at end of file +from .link.batch_url.sqlalchemy import LinkBatchURL +from .anon_session.sqlalchemy import AnonymousSession \ No newline at end of file diff --git a/tests/manual/core/tasks/scheduled/ds_app_sync/__init__.py b/tests/manual/core/tasks/scheduled/ds_app_sync/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/__init__.py b/tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/update/__init__.py b/tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/update/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/update/test_prereq.py b/tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/update/test_prereq.py new file mode 100644 index 00000000..a14dc917 --- /dev/null +++ b/tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/update/test_prereq.py @@ -0,0 +1,13 @@ +import pytest + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.queries.prereq import \ + DSAppSyncDataSourcesUpdatePrerequisitesQueryBuilder +from src.db.client.async_ import AsyncDatabaseClient + + +@pytest.mark.asyncio +async def test_prereq(adb_client_test: AsyncDatabaseClient): + await adb_client_test.run_query_builder( + DSAppSyncDataSourcesUpdatePrerequisitesQueryBuilder() + ) + From f380eb505d0c25c2911fbc90c9b26c82852da1f0 Mon Sep 17 00:00:00 2001 From: maxachis Date: Sun, 4 Jan 2026 08:47:04 -0500 Subject: [PATCH 24/24] Fix broken import --- tests/automated/unit/security_manager/test_security_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/automated/unit/security_manager/test_security_manager.py b/tests/automated/unit/security_manager/test_security_manager.py index 23138e47..42ae8e4d 100644 --- a/tests/automated/unit/security_manager/test_security_manager.py +++ b/tests/automated/unit/security_manager/test_security_manager.py @@ -6,7 +6,7 @@ from src.security.dtos.access_info import AccessInfo from src.security.enums import Permissions -from src.security.manager import SecurityManager, get_access_info +from src.security.manager import SecurityManager, get_admin_access_info SECRET_KEY = "test_secret_key" VALID_TOKEN = "valid_token"