diff --git a/ENV.md b/ENV.md index 386dbdae..6ad39c02 100644 --- a/ENV.md +++ b/ENV.md @@ -80,6 +80,7 @@ Note that some tasks/subtasks are themselves enabled by other tasks. | `DS_APP_SYNC_META_URL_ADD_TASK_FLAG` | Adds new meta URLs to the Data Sources App| | `DS_APP_SYNC_META_URL_UPDATE_TASK_FLAG` | Updates existing meta URLs in the Data Sources App| | `DS_APP_SYNC_META_URL_DELETE_TASK_FLAG` | Deletes meta URLs in the Data Sources App| +| `DS_APP_SYNC_USER_FOLLOWS_GET_TASK_FLAG` | Gets user follows from the Data Sources App| | `INTEGRITY_MONITOR_TASK_FLAG` | Runs integrity checks. | ### URL Task Flags diff --git a/alembic/versions/2025_12_05_1721-dfb64594049f_create_anonymous_annotation_name.py b/alembic/versions/2025_12_05_1721-dfb64594049f_create_anonymous_annotation_name.py new file mode 100644 index 00000000..848b9e98 --- /dev/null +++ b/alembic/versions/2025_12_05_1721-dfb64594049f_create_anonymous_annotation_name.py @@ -0,0 +1,47 @@ +"""Create anonymous_annotation_name + +Revision ID: dfb64594049f +Revises: 1d3398f9cd8a +Create Date: 2025-12-05 17:21:35.134935 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import UUID + +from src.util.alembic_helpers import created_at_column + +# revision identifiers, used by Alembic. +revision: str = 'dfb64594049f' +down_revision: Union[str, None] = '1d3398f9cd8a' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "link__anonymous_sessions__name_suggestions", + sa.Column( + "session_id", + UUID, + sa.ForeignKey("anonymous_sessions.id"), + nullable=False + ), + sa.Column( + "suggestion_id", + sa.Integer(), + sa.ForeignKey("url_name_suggestions.id"), + nullable=False, + ), + created_at_column(), + sa.PrimaryKeyConstraint( + "session_id", + "suggestion_id" + ) + ) + + +def downgrade() -> None: + pass diff --git a/alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py b/alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py new file mode 100644 index 00000000..fabfe098 --- /dev/null +++ b/alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py @@ -0,0 +1,53 @@ +"""Rename suggestion tables to consistent nomenclature + +Revision ID: 9292faed37fd +Revises: dfb64594049f +Create Date: 2025-12-18 09:51:20.074946 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '9292faed37fd' +down_revision: Union[str, None] = 'dfb64594049f' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +OLD_NEW_TABLE_MAPPING = { + # Anonymous Suggestions + "anonymous_annotation_agency": "annotation__agency__anon", + "anonymous_annotation_location": "annotation__location__anon", + "anonymous_annotation_record_type": "annotation__record_type__anon", + "anonymous_annotation_url_type": "annotation__url_type__anon", + # User Suggestions + "user_url_agency_suggestions": "annotation__agency__user", + "user_location_suggestions": "annotation__location__user", + "user_record_type_suggestions": "annotation__record_type__user", + "user_url_type_suggestions": "annotation__url_type__user", + # Auto suggestions + "auto_location_id_subtasks": "annotation__location__auto__subtasks", + "location_id_subtask_suggestions": "annotation__location__auto__suggestions", + "url_auto_agency_id_subtasks": "annotation__agency__auto__subtasks", + "agency_id_subtask_suggestions": "annotation__agency__auto__suggestions", + "auto_record_type_suggestions": "annotation__record_type__auto", + "auto_relevant_suggestions": "annotation__url_type__auto", + # Name suggestions + "url_name_suggestions": "annotation__name__suggestions", + "link__anonymous_sessions__name_suggestions": "annotation__name__anon__endorsements", + "link_user_name_suggestions": "annotation__name__user__endorsements", +} + +def upgrade() -> None: + for old_table_name, new_table_name in OLD_NEW_TABLE_MAPPING.items(): + op.rename_table( + old_table_name=old_table_name, + new_table_name=new_table_name + ) + + +def downgrade() -> None: + pass diff --git a/alembic/versions/2025_12_21_1957-30ee666f15d1_add_pending_agencies_tables.py b/alembic/versions/2025_12_21_1957-30ee666f15d1_add_pending_agencies_tables.py new file mode 100644 index 00000000..b5af2358 --- /dev/null +++ b/alembic/versions/2025_12_21_1957-30ee666f15d1_add_pending_agencies_tables.py @@ -0,0 +1,112 @@ +"""Add pending agencies tables + +Revision ID: 30ee666f15d1 +Revises: 9292faed37fd +Create Date: 2025-12-21 19:57:58.199838 + +Design notes: + +After debating it internally, I elected to have a separate pending agencies table, +rather than adding an `approval status` column to the agencies table. + +This is for a few reasons: + 1. Many existing queries and models rely on the current agency setup, + and would need to be retrofitted in order to filter + approved and unapproved agencies. + 2. Some existing links, such as between agencies and batches, between agencies and URLs, + or agency annotations for URLs, would not make sense for pending agencies, + and would be difficult to prevent in the database. + +This setup does, however, make it more difficult to check for duplicates between +existing agencies and pending agencies. However, I concluded it was better for +pending agencies to be negatively affected by these design choices than +for existing agencies to be affected by the above design choices. + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from src.util.alembic_helpers import id_column, created_at_column, enum_column, agency_id_column + +# revision identifiers, used by Alembic. +revision: str = '30ee666f15d1' +down_revision: Union[str, None] = '9292faed37fd' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + + + +def upgrade() -> None: + _create_proposed_agency_table() + _create_proposed_agency_location_table() + _create_proposed_agency_decision_info_table() + +def _create_proposed_agency_decision_info_table(): + op.create_table( + "proposal__agencies__decision_info", + sa.Column("proposal_agency_id", sa.Integer(), sa.ForeignKey("proposal__agencies.id"), nullable=False), + sa.Column("deciding_user_id", sa.Integer), + sa.Column("rejection_reason", sa.String(), nullable=True), + created_at_column(), + sa.PrimaryKeyConstraint("proposal_agency_id") + ) + + +def _create_proposed_agency_table(): + op.execute("CREATE TYPE proposal_status_enum AS ENUM ('pending', 'approved', 'rejected');") + + op.create_table( + "proposal__agencies", + id_column(), + sa.Column("name", sa.String(), nullable=False), + enum_column( + column_name="agency_type", + enum_name="agency_type_enum", + ), + enum_column( + column_name="jurisdiction_type", + enum_name="jurisdiction_type_enum" + ), + sa.Column("proposing_user_id", sa.Integer(), nullable=True), + sa.Column( + "promoted_agency_id", + sa.Integer(), + sa.ForeignKey( + "agencies.id" + ) + ), + enum_column( + column_name="proposal_status", + enum_name="proposal_status_enum", + ), + created_at_column(), + sa.CheckConstraint( + "promoted_agency_id IS NULL OR proposal_status = 'pending'", + name="ck_agency_id_or_proposal_status" + ) + ) + +def _create_proposed_agency_location_table(): + op.create_table( + "proposal__link__agencies__locations", + sa.Column( + "proposal_agency_id", + sa.Integer(), + sa.ForeignKey("proposal__agencies.id"), + nullable=False, + ), + sa.Column( + "location_id", + sa.Integer(), + sa.ForeignKey("locations.id"), + nullable=False + ), + created_at_column(), + sa.PrimaryKeyConstraint("proposal_agency_id", "location_id") + ) + +def downgrade() -> None: + pass diff --git a/alembic/versions/2025_12_24_1854-e88e4e962dc7_add_link__locations__user_follows_table.py b/alembic/versions/2025_12_24_1854-e88e4e962dc7_add_link__locations__user_follows_table.py new file mode 100644 index 00000000..a2b82ff0 --- /dev/null +++ b/alembic/versions/2025_12_24_1854-e88e4e962dc7_add_link__locations__user_follows_table.py @@ -0,0 +1,42 @@ +"""Add link__locations__user_follows table + +Revision ID: e88e4e962dc7 +Revises: 30ee666f15d1 +Create Date: 2025-12-24 18:54:38.897466 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from src.util.alembic_helpers import add_enum_value, location_id_column, user_id_column, created_at_column + +# revision identifiers, used by Alembic. +revision: str = 'e88e4e962dc7' +down_revision: Union[str, None] = '30ee666f15d1' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +def upgrade() -> None: + _add_link_locations_user_follows_table() + _add_follows_sync_task() + +def _add_link_locations_user_follows_table(): + op.create_table( + "link__locations__user_follows", + location_id_column(), + user_id_column(), + created_at_column(), + sa.PrimaryKeyConstraint("location_id", "user_id"), + ) + + +def _add_follows_sync_task(): + add_enum_value( + enum_name="task_type", + enum_value="Sync User Follows Get" + ) + +def downgrade() -> None: + pass diff --git a/alembic/versions/2025_12_26_1527-42933d84aa52_revise_annotation_count_view.py b/alembic/versions/2025_12_26_1527-42933d84aa52_revise_annotation_count_view.py new file mode 100644 index 00000000..241c7845 --- /dev/null +++ b/alembic/versions/2025_12_26_1527-42933d84aa52_revise_annotation_count_view.py @@ -0,0 +1,210 @@ +"""Revise annotation count view + +Revision ID: 42933d84aa52 +Revises: e88e4e962dc7 +Create Date: 2025-12-26 15:27:30.368862 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '42933d84aa52' +down_revision: Union[str, None] = 'e88e4e962dc7' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.execute("""DROP VIEW IF EXISTS url_annotation_count_view""") + op.execute( + """ + CREATE VIEW url_annotation_count_view AS + WITH + auto_location_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__location__auto__subtasks anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , auto_agency_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__agency__auto__subtasks anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , auto_url_type_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__url_type__auto anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , auto_record_type_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__record_type__auto anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , user_location_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__location__user anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , user_agency_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__agency__user anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , user_url_type_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__url_type__user anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , user_record_type_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__record_type__user anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , anon_location_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__location__anon anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , anon_agency_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__agency__anon anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , anon_url_type_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__url_type__anon anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + , anon_record_type_count AS ( + SELECT + u_1.id, + count(anno.url_id) AS cnt + FROM + urls u_1 + JOIN annotation__record_type__anon anno + ON u_1.id = anno.url_id + GROUP BY + u_1.id + ) + SELECT + u.id AS url_id, + COALESCE(auto_ag.cnt, 0::bigint) AS auto_agency_count, + COALESCE(auto_loc.cnt, 0::bigint) AS auto_location_count, + COALESCE(auto_rec.cnt, 0::bigint) AS auto_record_type_count, + COALESCE(auto_typ.cnt, 0::bigint) AS auto_url_type_count, + COALESCE(user_ag.cnt, 0::bigint) AS user_agency_count, + COALESCE(user_loc.cnt, 0::bigint) AS user_location_count, + COALESCE(user_rec.cnt, 0::bigint) AS user_record_type_count, + COALESCE(user_typ.cnt, 0::bigint) AS user_url_type_count, + COALESCE(anon_ag.cnt, 0::bigint) AS anon_agency_count, + COALESCE(anon_loc.cnt, 0::bigint) AS anon_location_count, + COALESCE(anon_rec.cnt, 0::bigint) AS anon_record_type_count, + COALESCE(anon_typ.cnt, 0::bigint) AS anon_url_type_count, + COALESCE(auto_ag.cnt, 0::bigint) + COALESCE(auto_loc.cnt, 0::bigint) + COALESCE(auto_rec.cnt, 0::bigint) + + COALESCE(auto_typ.cnt, 0::bigint) + COALESCE(user_ag.cnt, 0::bigint) + COALESCE(user_loc.cnt, 0::bigint) + + COALESCE(user_rec.cnt, 0::bigint) + COALESCE(user_typ.cnt, 0::bigint) + COALESCE(anon_ag.cnt, 0::bigint) + + COALESCE(anon_loc.cnt, 0::bigint) + COALESCE(anon_rec.cnt, 0::bigint) + COALESCE(anon_typ.cnt, 0::bigint) AS total_anno_count + + FROM + urls u + LEFT JOIN auto_agency_count auto_ag + ON auto_ag.id = u.id + LEFT JOIN auto_location_count auto_loc + ON auto_loc.id = u.id + LEFT JOIN auto_record_type_count auto_rec + ON auto_rec.id = u.id + LEFT JOIN auto_url_type_count auto_typ + ON auto_typ.id = u.id + LEFT JOIN user_agency_count user_ag + ON user_ag.id = u.id + LEFT JOIN user_location_count user_loc + ON user_loc.id = u.id + LEFT JOIN user_record_type_count user_rec + ON user_rec.id = u.id + LEFT JOIN user_url_type_count user_typ + ON user_typ.id = u.id + LEFT JOIN anon_agency_count anon_ag + ON user_ag.id = u.id + LEFT JOIN anon_location_count anon_loc + ON user_loc.id = u.id + LEFT JOIN anon_record_type_count anon_rec + ON user_rec.id = u.id + LEFT JOIN anon_url_type_count anon_typ + ON user_typ.id = u.id + + """ + ) + + +def downgrade() -> None: + pass diff --git a/alembic/versions/2025_12_31_1106-759ce7d0772b_remove_url_status_attribute.py b/alembic/versions/2025_12_31_1106-759ce7d0772b_remove_url_status_attribute.py new file mode 100644 index 00000000..379c045a --- /dev/null +++ b/alembic/versions/2025_12_31_1106-759ce7d0772b_remove_url_status_attribute.py @@ -0,0 +1,31 @@ +"""Remove URL Status attribute + +Revision ID: 759ce7d0772b +Revises: 42933d84aa52 +Create Date: 2025-12-31 11:06:39.037486 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '759ce7d0772b' +down_revision: Union[str, None] = '42933d84aa52' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.drop_column( + table_name="urls", + column_name="status" + ) + + op.execute("""DROP type url_status""") + + +def downgrade() -> None: + pass diff --git a/src/api/endpoints/agencies/by_id/put/query.py b/src/api/endpoints/agencies/by_id/put/query.py index 942203fc..b3f3f75b 100644 --- a/src/api/endpoints/agencies/by_id/put/query.py +++ b/src/api/endpoints/agencies/by_id/put/query.py @@ -36,7 +36,7 @@ async def run(self, session: AsyncSession) -> None: if self.request.name is not None: agency.name = self.request.name if self.request.type is not None: - agency.type = self.request.type + agency.agency_type = self.request.type if self.request.jurisdiction_type is not None: agency.jurisdiction_type = self.request.jurisdiction_type diff --git a/src/api/endpoints/agencies/root/get/query.py b/src/api/endpoints/agencies/root/get/query.py index ae3b943d..12099906 100644 --- a/src/api/endpoints/agencies/root/get/query.py +++ b/src/api/endpoints/agencies/root/get/query.py @@ -26,6 +26,9 @@ async def run(self, session: AsyncSession) -> list[AgencyGetResponse]: .options( selectinload(Agency.locations) ) + .order_by( + Agency.id.asc() + ) .offset((self.page - 1) * 100) .limit(100) ) diff --git a/src/api/endpoints/agencies/routes.py b/src/api/endpoints/agencies/routes.py index b0a756aa..bfbf456f 100644 --- a/src/api/endpoints/agencies/routes.py +++ b/src/api/endpoints/agencies/routes.py @@ -16,6 +16,8 @@ from src.api.endpoints.agencies.root.post.response import AgencyPostResponse from src.api.shared.models.message_response import MessageResponse from src.core.core import AsyncCore +from src.security.dtos.access_info import AccessInfo +from src.security.manager import get_admin_access_info agencies_router = APIRouter(prefix="/agencies", tags=["Agencies"]) @@ -34,7 +36,9 @@ async def get_agencies( @agencies_router.post("") async def create_agency( request: AgencyPostRequest, + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), + ) -> AgencyPostResponse: return await async_core.adb_client.run_query_builder( AddAgencyQueryBuilder(request=request) @@ -45,6 +49,7 @@ async def delete_agency( agency_id: int = Path( description="Agency ID to delete" ), + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await async_core.adb_client.run_query_builder( @@ -58,6 +63,7 @@ async def update_agency( agency_id: int = Path( description="Agency ID to update" ), + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await async_core.adb_client.run_query_builder( @@ -84,6 +90,7 @@ async def add_location_to_agency( location_id: int = Path( description="Location ID to add" ), + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await async_core.adb_client.run_query_builder( @@ -99,6 +106,7 @@ async def remove_location_from_agency( location_id: int = Path( description="Location ID to remove" ), + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await async_core.adb_client.run_query_builder( diff --git a/src/api/endpoints/annotate/_shared/extract.py b/src/api/endpoints/annotate/_shared/extract.py index 3fb7770b..4a7517eb 100644 --- a/src/api/endpoints/annotate/_shared/extract.py +++ b/src/api/endpoints/annotate/_shared/extract.py @@ -16,8 +16,8 @@ from src.api.endpoints.annotate.all.get.queries.name.core import GetNameSuggestionsQueryBuilder from src.db.dto_converter import DTOConverter from src.db.dtos.url.mapping_.simple import SimpleURLMapping +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion async def extract_and_format_get_annotation_result( @@ -28,18 +28,25 @@ async def extract_and_format_get_annotation_result( html_response_info = DTOConverter.html_content_list_to_html_response_info( url.html_content ) + # URL Types url_type_suggestions: list[URLTypeAnnotationSuggestion] = \ convert_user_url_type_suggestion_to_url_type_annotation_suggestion( - url.user_relevant_suggestions + url.user_url_type_suggestions, + url.anon_url_type_suggestions ) + # Record Types record_type_suggestions: RecordTypeAnnotationResponseOuterInfo = \ convert_user_record_type_suggestion_to_record_type_annotation_suggestion( - url.user_record_type_suggestions + url.user_record_type_suggestions, + url.anon_record_type_suggestions ) + # Agencies agency_suggestions: AgencyAnnotationResponseOuterInfo = \ await GetAgencySuggestionsQueryBuilder(url_id=url.id).run(session) + # Locations location_suggestions: LocationAnnotationResponseOuterInfo = \ await GetLocationSuggestionsQueryBuilder(url_id=url.id).run(session) + # Names name_suggestions: NameAnnotationResponseOuterInfo = \ await GetNameSuggestionsQueryBuilder(url_id=url.id).run(session) return GetNextURLForAllAnnotationResponse( @@ -55,7 +62,7 @@ async def extract_and_format_get_annotation_result( batch_info=await GetAnnotationBatchInfoQueryBuilder( batch_id=batch_id, models=[ - UserURLAgencySuggestion, + AnnotationAgencyUser, ] ).run(session), location_suggestions=location_suggestions, diff --git a/src/api/endpoints/annotate/_shared/queries/get_annotation_batch_info.py b/src/api/endpoints/annotate/_shared/queries/get_annotation_batch_info.py index 5a56cf32..b9fcc935 100644 --- a/src/api/endpoints/annotate/_shared/queries/get_annotation_batch_info.py +++ b/src/api/endpoints/annotate/_shared/queries/get_annotation_batch_info.py @@ -4,7 +4,6 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.annotate.dtos.shared.batch import AnnotationBatchInfo -from src.collectors.enums import URLStatus from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL from src.db.queries.base.builder import QueryBuilderBase @@ -42,7 +41,6 @@ async def run( ) common_where_clause = [ - URL.status == URLStatus.OK.value, LinkBatchURL.batch_id == self.batch_id, ] diff --git a/src/api/endpoints/annotate/_shared/queries/helper.py b/src/api/endpoints/annotate/_shared/queries/helper.py new file mode 100644 index 00000000..57370c36 --- /dev/null +++ b/src/api/endpoints/annotate/_shared/queries/helper.py @@ -0,0 +1,81 @@ +""" +This module contains helper functions for the annotate GET queries +""" + +from sqlalchemy import Select, case, CTE, ColumnElement +from sqlalchemy.orm import joinedload + +from src.db.helpers.query import exists_url, not_exists_url +from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended +from src.db.models.impl.url.core.enums import URLSource +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.views.unvalidated_url import UnvalidatedURL +from src.db.models.views.url_anno_count import URLAnnotationCount +from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView + + +def add_joins(query: Select) -> Select: + query = ( + query + .join( + URLAnnotationFlagsView, + URLAnnotationFlagsView.url_id == URL.id + ) + .join( + URLAnnotationCount, + URLAnnotationCount.url_id == URL.id + ) + ) + return query + +def add_common_where_conditions( + query: Select, +) -> Select: + return query.where( + not_exists_url( + FlagURLSuspended + ), + # URL Must be unvalidated + exists_url( + UnvalidatedURL + ) + ) + +def add_load_options( + query: Select +) -> Select: + return query.options( + joinedload(URL.html_content), + joinedload(URL.user_url_type_suggestions), + joinedload(URL.user_record_type_suggestions), + joinedload(URL.anon_record_type_suggestions), + joinedload(URL.anon_url_type_suggestions), + ) + +def bool_sort( + condition: ColumnElement[bool] +) -> ColumnElement[int]: + return case( + (condition, 0), + else_=1 + ).asc() + +def common_sorts( + base_cte: CTE +) -> list[ColumnElement[int]]: + return [ + # Privilege URLs whose batches are associated with locations + # followed by ANY user + bool_sort(base_cte.c.followed_by_any_user), + # Privilege Manually Submitted URLs + bool_sort(URL.source == URLSource.MANUAL), + # Privilege based on total number of user annotations + URLAnnotationCount.user_url_type_count.desc(), + # Privilege based on total number of anon annotations + URLAnnotationCount.anon_url_type_count.desc(), + # Privilege based on total number of auto annotations + URLAnnotationCount.auto_url_type_count.desc(), + # Break additional ties by favoring least recently created URLs + URL.id.asc() + ] + diff --git a/src/api/endpoints/annotate/all/get/queries/agency/requester.py b/src/api/endpoints/annotate/all/get/queries/agency/requester.py index 9d933ae2..8c50b41d 100644 --- a/src/api/endpoints/annotate/all/get/queries/agency/requester.py +++ b/src/api/endpoints/annotate/all/get/queries/agency/requester.py @@ -8,10 +8,11 @@ from src.db.helpers.query import exists_url from src.db.helpers.session import session_helper as sh from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.templates.requester import RequesterBase @@ -36,10 +37,13 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]: .where( or_( exists_url( - UserURLAgencySuggestion + AnnotationAgencyUser ), exists_url( - URLAutoAgencyIDSubtask + AnnotationAgencyAutoSubtask + ), + exists_url( + AnnotationAgencyAnon ) ) ) @@ -49,34 +53,48 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]: # Number of users who suggested each agency user_suggestions_cte = ( select( - UserURLAgencySuggestion.url_id, - UserURLAgencySuggestion.agency_id, - func.count(UserURLAgencySuggestion.user_id).label('user_count') + AnnotationAgencyUser.url_id, + AnnotationAgencyUser.agency_id, + func.count(AnnotationAgencyUser.user_id).label('user_count') ) .group_by( - UserURLAgencySuggestion.agency_id, - UserURLAgencySuggestion.url_id, + AnnotationAgencyUser.agency_id, + AnnotationAgencyUser.url_id, ) .cte("user_suggestions") ) + # Number of anon users who suggested each agency + anon_suggestions_cte = ( + select( + AnnotationAgencyAnon.url_id, + AnnotationAgencyAnon.agency_id, + func.count(AnnotationAgencyAnon.session_id).label('anon_count') + ) + .group_by( + AnnotationAgencyAnon.agency_id, + AnnotationAgencyAnon.url_id, + ) + .cte("anon_suggestions") + ) + # Maximum confidence of robo annotation, if any robo_suggestions_cte = ( select( - URLAutoAgencyIDSubtask.url_id, + AnnotationAgencyAutoSubtask.url_id, Agency.id.label("agency_id"), - func.max(AgencyIDSubtaskSuggestion.confidence).label('robo_confidence') + func.max(AnnotationAgencyAutoSuggestion.confidence).label('robo_confidence') ) .join( - AgencyIDSubtaskSuggestion, - AgencyIDSubtaskSuggestion.subtask_id == URLAutoAgencyIDSubtask.id + AnnotationAgencyAutoSuggestion, + AnnotationAgencyAutoSuggestion.subtask_id == AnnotationAgencyAutoSubtask.id ) .join( Agency, - Agency.id == AgencyIDSubtaskSuggestion.agency_id + Agency.id == AnnotationAgencyAutoSuggestion.agency_id ) .group_by( - URLAutoAgencyIDSubtask.url_id, + AnnotationAgencyAutoSubtask.url_id, Agency.id ) .cte("robo_suggestions") @@ -88,6 +106,7 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]: Agency.name.label("display_name"), func.coalesce(user_suggestions_cte.c.user_count, 0).label('user_count'), func.coalesce(robo_suggestions_cte.c.robo_confidence, 0).label('robo_confidence'), + func.coalesce(anon_suggestions_cte.c.anon_count, 0).label('anon_count'), ) .join( Agency, @@ -100,6 +119,13 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]: user_suggestions_cte.c.agency_id == Agency.id ) ) + .outerjoin( + anon_suggestions_cte, + and_( + anon_suggestions_cte.c.url_id == self.url_id, + anon_suggestions_cte.c.agency_id == Agency.id + ) + ) .outerjoin( robo_suggestions_cte, and_( @@ -110,7 +136,8 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]: .where( or_( user_suggestions_cte.c.user_count > 0, - robo_suggestions_cte.c.robo_confidence > 0 + robo_suggestions_cte.c.robo_confidence > 0, + anon_suggestions_cte.c.anon_count > 0 ) ) ) @@ -119,7 +146,10 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]: mappings: Sequence[RowMapping] = await self.mappings(joined_suggestions_query) suggestions: list[SuggestionModel] = [ SuggestionModel( - **mapping + id=mapping["id"], + display_name=mapping["display_name"], + user_count=mapping['user_count'] + (mapping['anon_count'] // 2), + robo_confidence=mapping["robo_confidence"] ) for mapping in mappings ] diff --git a/src/api/endpoints/annotate/all/get/queries/convert.py b/src/api/endpoints/annotate/all/get/queries/convert.py index fe9b0777..fedfa8a2 100644 --- a/src/api/endpoints/annotate/all/get/queries/convert.py +++ b/src/api/endpoints/annotate/all/get/queries/convert.py @@ -1,43 +1,55 @@ +import math from collections import Counter from src.api.endpoints.annotate.all.get.models.record_type import RecordTypeAnnotationResponseOuterInfo, \ RecordTypeSuggestionModel from src.api.endpoints.annotate.all.get.models.url_type import URLTypeAnnotationSuggestion from src.core.enums import RecordType +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser def convert_user_url_type_suggestion_to_url_type_annotation_suggestion( - db_suggestions: list[UserURLTypeSuggestion] + user_suggestions: list[AnnotationURLTypeUser], + anon_suggestions: list[AnnotationURLTypeAnon] ) -> list[URLTypeAnnotationSuggestion]: counter: Counter[URLType] = Counter() - for suggestion in db_suggestions: + for suggestion in user_suggestions: counter[suggestion.type] += 1 + + for suggestion in anon_suggestions: + counter[suggestion.url_type] += 0.5 + anno_suggestions: list[URLTypeAnnotationSuggestion] = [] for url_type, endorsement_count in counter.most_common(3): anno_suggestions.append( URLTypeAnnotationSuggestion( url_type=url_type, - endorsement_count=endorsement_count, + endorsement_count=math.floor(endorsement_count), ) ) return anno_suggestions def convert_user_record_type_suggestion_to_record_type_annotation_suggestion( - db_suggestions: list[UserRecordTypeSuggestion] + user_suggestions: list[AnnotationRecordTypeUser], + anon_suggestions: list[AnnotationRecordTypeAnon] ) -> RecordTypeAnnotationResponseOuterInfo: counter: Counter[RecordType] = Counter() - for suggestion in db_suggestions: + for suggestion in user_suggestions: counter[suggestion.record_type] += 1 + for suggestion in anon_suggestions: + counter[suggestion.record_type] += 0.5 + suggestions: list[RecordTypeSuggestionModel] = [] for record_type, endorsement_count in counter.most_common(3): suggestions.append( RecordTypeSuggestionModel( record_type=record_type, - user_count=endorsement_count, + user_count=math.floor(endorsement_count), robo_confidence=0, ) ) diff --git a/src/api/endpoints/annotate/all/get/queries/core.py b/src/api/endpoints/annotate/all/get/queries/core.py index 89975a08..a382f0b4 100644 --- a/src/api/endpoints/annotate/all/get/queries/core.py +++ b/src/api/endpoints/annotate/all/get/queries/core.py @@ -1,20 +1,18 @@ -from sqlalchemy import Select, exists, select +from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import joinedload from src.api.endpoints.annotate._shared.extract import extract_and_format_get_annotation_result +from src.api.endpoints.annotate._shared.queries import helper from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse -from src.collectors.enums import URLStatus -from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended +from src.api.endpoints.annotate.all.get.queries.features.followed_by_any_user import get_followed_by_any_user_feature +from src.api.endpoints.annotate.all.get.queries.features.followed_by_user import get_followed_by_user_feature +from src.api.endpoints.annotate.all.get.queries.helpers import not_exists_user_annotation +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion -from src.db.models.views.unvalidated_url import UnvalidatedURL -from src.db.models.views.url_anno_count import URLAnnotationCount -from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView from src.db.queries.base.builder import QueryBuilderBase @@ -35,85 +33,64 @@ async def run( self, session: AsyncSession ) -> GetNextURLForAllAnnotationResponse: - query = ( - Select(URL) - # URL Must be unvalidated - .join( - UnvalidatedURL, - UnvalidatedURL.url_id == URL.id - ) - .join( - URLAnnotationFlagsView, - URLAnnotationFlagsView.url_id == URL.id - ) - .join( - URLAnnotationCount, - URLAnnotationCount.url_id == URL.id - ) + base_cte = select( + URL.id, + get_followed_by_user_feature(self.user_id), + get_followed_by_any_user_feature() + ).cte("base") + + query = select( + URL, + base_cte.c.followed_by_user, + base_cte.c.followed_by_any_user, + ).join( + base_cte, + base_cte.c.id == URL.id ) + query = helper.add_joins(query) + + # Add user annotation-specific joins and conditions if self.batch_id is not None: query = query.join(LinkBatchURL).where(LinkBatchURL.batch_id == self.batch_id) if self.url_id is not None: query = query.where(URL.id == self.url_id) + + user_models = [ + AnnotationURLTypeUser, + AnnotationAgencyUser, + AnnotationLocationUser, + AnnotationRecordTypeUser, + ] + query = ( query .where( - URL.status == URLStatus.OK.value, # Must not have been previously annotated by user - ~exists( - select(UserURLTypeSuggestion.url_id) - .where( - UserURLTypeSuggestion.url_id == URL.id, - UserURLTypeSuggestion.user_id == self.user_id, - ) - ), - ~exists( - select(UserURLAgencySuggestion.url_id) - .where( - UserURLAgencySuggestion.url_id == URL.id, - UserURLAgencySuggestion.user_id == self.user_id, - ) - ), - ~exists( - select( - UserLocationSuggestion.url_id - ) - .where( - UserLocationSuggestion.url_id == URL.id, - UserLocationSuggestion.user_id == self.user_id, - ) - ), - ~exists( - select( - UserRecordTypeSuggestion.url_id - ) - .where( - UserRecordTypeSuggestion.url_id == URL.id, - UserRecordTypeSuggestion.user_id == self.user_id, - ) - ), - ~exists( - select( - FlagURLSuspended.url_id - ) - .where( - FlagURLSuspended.url_id == URL.id, - ) + *[ + not_exists_user_annotation( + user_id=self.user_id, + user_model=user_model ) + for user_model in user_models + ] ) ) - # Add load options - query = query.options( - joinedload(URL.html_content), - joinedload(URL.user_relevant_suggestions), - joinedload(URL.user_record_type_suggestions), - joinedload(URL.name_suggestions), + + + # Conclude query with limit and sorting + query = helper.add_common_where_conditions(query) + query = helper.add_load_options(query) + query = ( + # Sorting Priority + query.order_by( + # If the specific user follows *this* location, privilege it + helper.bool_sort(base_cte.c.followed_by_user), + *helper.common_sorts(base_cte) + ) + # Limit to 1 result + .limit(1) ) - query = query.order_by( - URLAnnotationCount.total_anno_count.desc(), - URL.id.asc() - ).limit(1) raw_results = (await session.execute(query)).unique() url: URL | None = raw_results.scalars().one_or_none() if url is None: diff --git a/src/api/endpoints/annotate/all/get/queries/features/README.md b/src/api/endpoints/annotate/all/get/queries/features/README.md new file mode 100644 index 00000000..e37fe6e5 --- /dev/null +++ b/src/api/endpoints/annotate/all/get/queries/features/README.md @@ -0,0 +1 @@ +"Features" in this case refers to EXISTs subqueries which are separately calculated and used for sorting. \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/update_url_status/__init__.py b/src/api/endpoints/annotate/all/get/queries/features/__init__.py similarity index 100% rename from src/core/tasks/scheduled/impl/update_url_status/__init__.py rename to src/api/endpoints/annotate/all/get/queries/features/__init__.py diff --git a/src/api/endpoints/annotate/all/get/queries/features/followed_by_any_user.py b/src/api/endpoints/annotate/all/get/queries/features/followed_by_any_user.py new file mode 100644 index 00000000..e14ddddd --- /dev/null +++ b/src/api/endpoints/annotate/all/get/queries/features/followed_by_any_user.py @@ -0,0 +1,27 @@ +from sqlalchemy import exists, select, literal, Exists + +from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL +from src.db.models.impl.link.location__user_follow import LinkLocationUserFollow +from src.db.models.impl.link.location_batch.sqlalchemy import LinkLocationBatch +from src.db.models.impl.url.core.sqlalchemy import URL + + +def get_followed_by_any_user_feature() -> Exists: + query = ( + exists( + select(literal(1)) + .select_from(LinkBatchURL) + .join( + LinkLocationBatch, + LinkLocationBatch.batch_id == LinkBatchURL.batch_id + ) + .join( + LinkLocationUserFollow, + LinkLocationUserFollow.location_id == LinkLocationBatch.location_id + ) + .where( + URL.id == LinkBatchURL.url_id, + ) + ).label("followed_by_any_user") + ) + return query \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/get/queries/features/followed_by_user.py b/src/api/endpoints/annotate/all/get/queries/features/followed_by_user.py new file mode 100644 index 00000000..b73d4cd4 --- /dev/null +++ b/src/api/endpoints/annotate/all/get/queries/features/followed_by_user.py @@ -0,0 +1,30 @@ +from sqlalchemy import exists, select, literal, Exists + +from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL +from src.db.models.impl.link.location__user_follow import LinkLocationUserFollow +from src.db.models.impl.link.location_batch.sqlalchemy import LinkLocationBatch +from src.db.models.impl.url.core.sqlalchemy import URL + + +def get_followed_by_user_feature( + user_id: int +) -> Exists: + query = ( + exists( + select(literal(1)) + .select_from(LinkBatchURL) + .join( + LinkLocationBatch, + LinkLocationBatch.batch_id == LinkBatchURL.batch_id + ) + .join( + LinkLocationUserFollow, + LinkLocationUserFollow.location_id == LinkLocationBatch.location_id + ) + .where( + URL.id == LinkBatchURL.url_id, + LinkLocationUserFollow.user_id == user_id + ) + ).label("followed_by_user") + ) + return query \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/get/queries/helpers.py b/src/api/endpoints/annotate/all/get/queries/helpers.py new file mode 100644 index 00000000..da112099 --- /dev/null +++ b/src/api/endpoints/annotate/all/get/queries/helpers.py @@ -0,0 +1,26 @@ +from typing import Protocol, TypeVar + +from sqlalchemy import ColumnElement, select, exists + +from src.db.models.impl.url.core.sqlalchemy import URL + + +class UserURLModelProtocol( + Protocol, +): + user_id: ColumnElement[int] + url_id: ColumnElement[int] + +UserModel = TypeVar("UserModel", bound=UserURLModelProtocol) + +def not_exists_user_annotation( + user_id: int, + user_model: UserModel +) -> ColumnElement[bool]: + return ~exists( + select(user_model.url_id) + .where( + user_model.url_id == URL.id, + user_model.user_id == user_id, + ) + ) \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/get/queries/location_/core.py b/src/api/endpoints/annotate/all/get/queries/location_/core.py index 6081c5f7..5d03bc55 100644 --- a/src/api/endpoints/annotate/all/get/queries/location_/core.py +++ b/src/api/endpoints/annotate/all/get/queries/location_/core.py @@ -15,7 +15,6 @@ def __init__( super().__init__() self.url_id = url_id - # TODO: Test async def run(self, session: AsyncSession) -> LocationAnnotationResponseOuterInfo: requester = GetLocationSuggestionsRequester(session) diff --git a/src/api/endpoints/annotate/all/get/queries/location_/requester.py b/src/api/endpoints/annotate/all/get/queries/location_/requester.py index fad8e834..b8ba5410 100644 --- a/src/api/endpoints/annotate/all/get/queries/location_/requester.py +++ b/src/api/endpoints/annotate/all/get/queries/location_/requester.py @@ -6,10 +6,11 @@ from src.api.endpoints.annotate.all.get.queries._shared.sort import sort_suggestions from src.db.helpers.query import exists_url from src.db.helpers.session import session_helper as sh +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion from src.db.models.views.location_expanded import LocationExpandedView from src.db.templates.requester import RequesterBase @@ -25,10 +26,13 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]: .where( or_( exists_url( - UserLocationSuggestion + AnnotationLocationUser ), exists_url( - AutoLocationIDSubtask + AnnotationLocationAutoSubtask + ), + exists_url( + AnnotationLocationAnon ) ) ) @@ -37,34 +41,48 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]: # Number of users who suggested each location user_suggestions_cte = ( select( - UserLocationSuggestion.url_id, - UserLocationSuggestion.location_id, - func.count(UserLocationSuggestion.user_id).label('user_count') + AnnotationLocationUser.url_id, + AnnotationLocationUser.location_id, + func.count(AnnotationLocationUser.user_id).label('user_count') ) .group_by( - UserLocationSuggestion.location_id, - UserLocationSuggestion.url_id, + AnnotationLocationUser.location_id, + AnnotationLocationUser.url_id, ) .cte("user_suggestions") ) + # Number of anon users who suggested each location + anon_suggestions_cte = ( + select( + AnnotationLocationAnon.url_id, + AnnotationLocationAnon.location_id, + func.count(AnnotationLocationAnon.session_id).label('anon_count') + ) + .group_by( + AnnotationLocationAnon.location_id, + AnnotationLocationAnon.url_id, + ) + .cte("anon_suggestions") + ) + # Maximum confidence of robo annotation, if any robo_suggestions_cte = ( select( - AutoLocationIDSubtask.url_id, + AnnotationLocationAutoSubtask.url_id, LocationExpandedView.id.label("location_id"), - func.max(LocationIDSubtaskSuggestion.confidence).label('robo_confidence') + func.max(AnnotationLocationAutoSuggestion.confidence).label('robo_confidence') ) .join( LocationExpandedView, - LocationExpandedView.id == LocationIDSubtaskSuggestion.location_id + LocationExpandedView.id == AnnotationLocationAutoSuggestion.location_id ) .join( - AutoLocationIDSubtask, - AutoLocationIDSubtask.id == LocationIDSubtaskSuggestion.subtask_id + AnnotationLocationAutoSubtask, + AnnotationLocationAutoSubtask.id == AnnotationLocationAutoSuggestion.subtask_id ) .group_by( LocationExpandedView.id, - AutoLocationIDSubtask.url_id, + AnnotationLocationAutoSubtask.url_id, ) .cte("robo_suggestions") ) @@ -75,6 +93,7 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]: LocationExpandedView.full_display_name.label("display_name"), func.coalesce(user_suggestions_cte.c.user_count, 0).label("user_count"), func.coalesce(robo_suggestions_cte.c.robo_confidence, 0).label("robo_confidence"), + func.coalesce(anon_suggestions_cte.c.anon_count, 0).label("anon_count"), ) .join( LocationExpandedView, @@ -87,6 +106,13 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]: user_suggestions_cte.c.location_id == LocationExpandedView.id ) ) + .outerjoin( + anon_suggestions_cte, + and_( + anon_suggestions_cte.c.url_id == url_id, + anon_suggestions_cte.c.location_id == LocationExpandedView.id + ) + ) .outerjoin( robo_suggestions_cte, and_( @@ -97,7 +123,8 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]: .where( or_( user_suggestions_cte.c.user_count > 0, - robo_suggestions_cte.c.robo_confidence > 0 + robo_suggestions_cte.c.robo_confidence > 0, + anon_suggestions_cte.c.anon_count > 0 ) ) ) @@ -105,7 +132,10 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]: mappings: Sequence[RowMapping] = await self.mappings(joined_suggestions_query) suggestions: list[SuggestionModel] = [ SuggestionModel( - **mapping + id=mapping["id"], + display_name=mapping["display_name"], + user_count=mapping['user_count'] + (mapping['anon_count'] // 2), + robo_confidence=mapping["robo_confidence"] ) for mapping in mappings ] diff --git a/src/api/endpoints/annotate/all/get/queries/name/core.py b/src/api/endpoints/annotate/all/get/queries/name/core.py index 9438f14e..b41ee4fd 100644 --- a/src/api/endpoints/annotate/all/get/queries/name/core.py +++ b/src/api/endpoints/annotate/all/get/queries/name/core.py @@ -5,9 +5,10 @@ from src.api.endpoints.annotate.all.get.models.name import NameAnnotationSuggestion, NameAnnotationResponseOuterInfo from src.db.helpers.session import session_helper as sh -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.queries.base.builder import QueryBuilderBase @@ -23,30 +24,39 @@ def __init__( async def run(self, session: AsyncSession) -> NameAnnotationResponseOuterInfo: query = ( select( - URLNameSuggestion.id.label('id'), - URLNameSuggestion.suggestion.label('display_name'), + AnnotationNameSuggestion.id.label('id'), + AnnotationNameSuggestion.suggestion.label('display_name'), func.count( - LinkUserNameSuggestion.user_id + AnnotationNameUserEndorsement.user_id ).label('user_count'), + func.count( + AnnotationNameAnonEndorsement.session_id + ).label('anon_count'), case( - (URLNameSuggestion.source == NameSuggestionSource.HTML_METADATA_TITLE, 1), + (AnnotationNameSuggestion.source == NameSuggestionSource.HTML_METADATA_TITLE, 1), else_=0 ).label("robo_count") ) .outerjoin( - LinkUserNameSuggestion, - LinkUserNameSuggestion.suggestion_id == URLNameSuggestion.id, + AnnotationNameUserEndorsement, + AnnotationNameUserEndorsement.suggestion_id == AnnotationNameSuggestion.id, + ) + .outerjoin( + AnnotationNameAnonEndorsement, + AnnotationNameAnonEndorsement.suggestion_id == AnnotationNameSuggestion.id, ) .where( - URLNameSuggestion.url_id == self.url_id, + AnnotationNameSuggestion.url_id == self.url_id, ) .group_by( - URLNameSuggestion.id, - URLNameSuggestion.suggestion, + AnnotationNameSuggestion.id, + AnnotationNameSuggestion.suggestion, ) .order_by( - func.count(LinkUserNameSuggestion.user_id).desc(), - URLNameSuggestion.id.asc(), + (func.count(AnnotationNameUserEndorsement.user_id) + func.count( + AnnotationNameUserEndorsement.user_id + )).desc(), + AnnotationNameSuggestion.id.asc(), ) .limit(3) ) @@ -54,7 +64,10 @@ async def run(self, session: AsyncSession) -> NameAnnotationResponseOuterInfo: mappings: Sequence[RowMapping] = await sh.mappings(session, query=query) suggestions = [ NameAnnotationSuggestion( - **mapping + id=mapping["id"], + display_name=mapping["display_name"], + user_count=mapping['user_count'] + (mapping['anon_count'] // 2), + robo_count=mapping["robo_count"] ) for mapping in mappings ] diff --git a/src/api/endpoints/annotate/all/post/requester.py b/src/api/endpoints/annotate/all/post/requester.py index 8834ff76..e0119235 100644 --- a/src/api/endpoints/annotate/all/post/requester.py +++ b/src/api/endpoints/annotate/all/post/requester.py @@ -2,16 +2,16 @@ from src.api.endpoints.annotate.all.post.models.name import AnnotationPostNameInfo from src.core.enums import RecordType +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.templates.requester import RequesterBase @@ -33,7 +33,7 @@ def optionally_add_record_type( ) -> None: if rt is None: return - record_type_suggestion = UserRecordTypeSuggestion( + record_type_suggestion = AnnotationRecordTypeUser( url_id=self.url_id, user_id=self.user_id, record_type=rt.value @@ -44,7 +44,7 @@ def add_relevant_annotation( self, url_type: URLType, ) -> None: - relevant_suggestion = UserURLTypeSuggestion( + relevant_suggestion = AnnotationURLTypeUser( url_id=self.url_id, user_id=self.user_id, type=url_type @@ -53,7 +53,7 @@ def add_relevant_annotation( def add_agency_ids(self, agency_ids: list[int]) -> None: for agency_id in agency_ids: - agency_suggestion = UserURLAgencySuggestion( + agency_suggestion = AnnotationAgencyUser( url_id=self.url_id, user_id=self.user_id, agency_id=agency_id, @@ -61,9 +61,9 @@ def add_agency_ids(self, agency_ids: list[int]) -> None: self.session.add(agency_suggestion) def add_location_ids(self, location_ids: list[int]) -> None: - locations: list[UserLocationSuggestion] = [] + locations: list[AnnotationLocationUser] = [] for location_id in location_ids: - locations.append(UserLocationSuggestion( + locations.append(AnnotationLocationUser( url_id=self.url_id, user_id=self.user_id, location_id=location_id @@ -77,20 +77,20 @@ async def optionally_add_name_suggestion( if name_info.empty: return if name_info.existing_name_id is not None: - link = LinkUserNameSuggestion( + link = AnnotationNameUserEndorsement( user_id=self.user_id, suggestion_id=name_info.existing_name_id, ) self.session.add(link) return - name_suggestion = URLNameSuggestion( + name_suggestion = AnnotationNameSuggestion( url_id=self.url_id, suggestion=name_info.new_name, source=NameSuggestionSource.USER ) self.session.add(name_suggestion) await self.session.flush() - link = LinkUserNameSuggestion( + link = AnnotationNameUserEndorsement( user_id=self.user_id, suggestion_id=name_suggestion.id, ) diff --git a/src/api/endpoints/annotate/anonymous/get/helpers.py b/src/api/endpoints/annotate/anonymous/get/helpers.py index 83a10845..96a15680 100644 --- a/src/api/endpoints/annotate/anonymous/get/helpers.py +++ b/src/api/endpoints/annotate/anonymous/get/helpers.py @@ -1,12 +1,9 @@ from typing import Protocol, TypeVar from uuid import UUID -from marshmallow.fields import Bool -from sqlalchemy import Exists, select, exists, ColumnElement, Boolean +from sqlalchemy import select, exists, ColumnElement from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.mixins import AnonymousSessionMixin, URLDependentMixin -from src.db.models.templates_.base import Base class AnonymousURLModelProtocol( @@ -17,7 +14,10 @@ class AnonymousURLModelProtocol( AnonModel = TypeVar("AnonModel", bound=AnonymousURLModelProtocol) -def not_exists_anon_annotation(session_id: UUID, anon_model: AnonModel) -> ColumnElement[bool]: +def not_exists_anon_annotation( + session_id: UUID, + anon_model: AnonModel +) -> ColumnElement[bool]: return ~exists( select(anon_model.url_id) .where( diff --git a/src/api/endpoints/annotate/anonymous/get/query.py b/src/api/endpoints/annotate/anonymous/get/query.py index 041d5cda..c53726e1 100644 --- a/src/api/endpoints/annotate/anonymous/get/query.py +++ b/src/api/endpoints/annotate/anonymous/get/query.py @@ -1,25 +1,21 @@ -from typing import Any from uuid import UUID -from sqlalchemy import Select, func, exists, select +from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import joinedload from src.api.endpoints.annotate._shared.extract import extract_and_format_get_annotation_result +from src.api.endpoints.annotate._shared.queries import helper +from src.api.endpoints.annotate._shared.queries.helper import add_common_where_conditions, add_load_options, \ + common_sorts from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse +from src.api.endpoints.annotate.all.get.queries.features.followed_by_any_user import get_followed_by_any_user_feature from src.api.endpoints.annotate.anonymous.get.helpers import not_exists_anon_annotation from src.api.endpoints.annotate.anonymous.get.response import GetNextURLForAnonymousAnnotationResponse -from src.collectors.enums import URLStatus -from src.db.helpers.query import not_exists_url -from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency -from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation -from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType -from src.db.models.views.unvalidated_url import UnvalidatedURL -from src.db.models.views.url_anno_count import URLAnnotationCount -from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView from src.db.queries.base.builder import QueryBuilderBase @@ -33,60 +29,49 @@ def __init__( self.session_id = session_id async def run(self, session: AsyncSession) -> GetNextURLForAnonymousAnnotationResponse: + base_cte = select( + URL.id, + get_followed_by_any_user_feature() + ).cte("base") + query = select( + URL, + base_cte.c.followed_by_any_user, + ).join( + base_cte, + base_cte.c.id == URL.id + ) + query = helper.add_joins(query) + + anon_models = [ + AnnotationURLTypeAnon, + AnnotationRecordTypeAnon, + AnnotationLocationAnon, + AnnotationAgencyAnon + ] + + # Add anonymous annotation-specific conditions. query = ( - Select(URL) - # URL Must be unvalidated - .join( - UnvalidatedURL, - UnvalidatedURL.url_id == URL.id - ) - .join( - URLAnnotationFlagsView, - URLAnnotationFlagsView.url_id == URL.id - ) - .join( - URLAnnotationCount, - URLAnnotationCount.url_id == URL.id - ) + query .where( - URL.status == URLStatus.OK.value, # Must not have been previously annotated by user - not_exists_anon_annotation( - session_id=self.session_id, - anon_model=AnonymousAnnotationURLType - ), - not_exists_anon_annotation( - session_id=self.session_id, - anon_model=AnonymousAnnotationRecordType - ), - not_exists_anon_annotation( - session_id=self.session_id, - anon_model=AnonymousAnnotationLocation - ), - not_exists_anon_annotation( - session_id=self.session_id, - anon_model=AnonymousAnnotationAgency - ), - ~exists( - select( - FlagURLSuspended.url_id - ) - .where( - FlagURLSuspended.url_id == URL.id, + *[ + not_exists_anon_annotation( + session_id=self.session_id, + anon_model=anon_model ) - ) - ) - .options( - joinedload(URL.html_content), - joinedload(URL.user_relevant_suggestions), - joinedload(URL.user_record_type_suggestions), - joinedload(URL.name_suggestions), + for anon_model in anon_models + ] ) - .order_by( - URLAnnotationCount.total_anno_count.desc(), - URL.id.asc() + ) + query = add_common_where_conditions(query) + query = add_load_options(query) + query = ( + # Sorting Priority + query.order_by( + *common_sorts(base_cte) ) + # Limit to 1 result .limit(1) ) diff --git a/src/api/endpoints/annotate/anonymous/post/query.py b/src/api/endpoints/annotate/anonymous/post/query.py index 593d79d9..a4f0cebf 100644 --- a/src/api/endpoints/annotate/anonymous/post/query.py +++ b/src/api/endpoints/annotate/anonymous/post/query.py @@ -3,10 +3,13 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo -from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency -from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation -from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement from src.db.queries.base.builder import QueryBuilderBase @@ -24,15 +27,37 @@ def __init__( async def run(self, session: AsyncSession) -> None: - url_type_suggestion = AnonymousAnnotationURLType( + url_type_suggestion = AnnotationURLTypeAnon( url_id=self.url_id, url_type=self.post_info.suggested_status, session_id=self.session_id ) session.add(url_type_suggestion) + name_id: int | None + if self.post_info.name_info.new_name is not None: + name_suggestion = AnnotationNameSuggestion( + url_id=self.url_id, + suggestion=self.post_info.name_info.new_name, + source=NameSuggestionSource.USER + ) + session.add(name_suggestion) + await session.flush() + name_id = name_suggestion.id + elif self.post_info.name_info.existing_name_id is not None: + name_id = self.post_info.name_info.existing_name_id + else: + name_id = None + + if name_id is not None: + name_suggestion = AnnotationNameAnonEndorsement( + suggestion_id=name_id, + session_id=self.session_id + ) + session.add(name_suggestion) + if self.post_info.record_type is not None: - record_type_suggestion = AnonymousAnnotationRecordType( + record_type_suggestion = AnnotationRecordTypeAnon( url_id=self.url_id, record_type=self.post_info.record_type, session_id=self.session_id @@ -41,7 +66,7 @@ async def run(self, session: AsyncSession) -> None: if len(self.post_info.location_info.location_ids) != 0: location_suggestions = [ - AnonymousAnnotationLocation( + AnnotationLocationAnon( url_id=self.url_id, location_id=location_id, session_id=self.session_id @@ -52,7 +77,7 @@ async def run(self, session: AsyncSession) -> None: if len(self.post_info.agency_info.agency_ids) != 0: agency_suggestions = [ - AnonymousAnnotationAgency( + AnnotationAgencyAnon( url_id=self.url_id, agency_id=agency_id, session_id=self.session_id @@ -61,4 +86,3 @@ async def run(self, session: AsyncSession) -> None: ] session.add_all(agency_suggestions) - # Ignore Name suggestions \ No newline at end of file diff --git a/src/db/models/impl/link/user_name_suggestion/__init__.py b/src/api/endpoints/annotate/migrate/__init__.py similarity index 100% rename from src/db/models/impl/link/user_name_suggestion/__init__.py rename to src/api/endpoints/annotate/migrate/__init__.py diff --git a/src/api/endpoints/annotate/migrate/query.py b/src/api/endpoints/annotate/migrate/query.py new file mode 100644 index 00000000..0fe51b68 --- /dev/null +++ b/src/api/endpoints/annotate/migrate/query.py @@ -0,0 +1,132 @@ +from typing import Any +from uuid import UUID + +from sqlalchemy import select, delete +from sqlalchemy.dialects.postgresql import insert +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser +from src.db.queries.base.builder import QueryBuilderBase + + +class MigrateAnonymousAnnotationsQueryBuilder(QueryBuilderBase): + + def __init__( + self, + session_id: UUID, + user_id: int + ): + super().__init__() + self.session_id = session_id + self.user_id = user_id + + async def run(self, session: AsyncSession) -> Any: + await self.migrate_agency_annotations(session) + await self.migrate_location_annotations(session) + await self.migrate_record_type_annotations(session) + await self.migrate_url_type_annotations(session) + await self.migrate_name_annotations(session) + + async def migrate_agency_annotations(self, session: AsyncSession) -> None: + # Copy all agency annotations from anonymous to user. + statement = insert(AnnotationAgencyUser).from_select( + ["agency_id", "url_id", "user_id"], + select( + AnnotationAgencyAnon.agency_id, + AnnotationAgencyAnon.url_id, + self.user_id + ).where( + AnnotationAgencyAnon.session_id == self.session_id + ).distinct() + ).on_conflict_do_nothing(index_elements=["agency_id", "url_id", "user_id"]) + await session.execute(statement) + # Delete all anonymous agency annotations. + statement = delete(AnnotationAgencyAnon).where( + AnnotationAgencyAnon.session_id == self.session_id + ) + await session.execute(statement) + + + async def migrate_location_annotations(self, session: AsyncSession) -> None: + # Copy all location annotations from anonymous to user. + statement = insert(AnnotationLocationUser).from_select( + ['location_id', 'url_id', 'user_id'], + select( + AnnotationLocationAnon.location_id, + AnnotationLocationAnon.url_id, + self.user_id + ).where( + AnnotationLocationAnon.session_id == self.session_id + ).distinct() + ).on_conflict_do_nothing(index_elements=["location_id", "url_id", "user_id"]) + await session.execute(statement) + # Delete all anonymous location annotations. + statement = delete(AnnotationLocationAnon).where( + AnnotationLocationAnon.session_id == self.session_id + ) + await session.execute(statement) + + async def migrate_record_type_annotations(self, session: AsyncSession) -> None: + # Copy all record type annotations from anonymous to user. + statement = insert(AnnotationRecordTypeUser).from_select( + ['record_type', 'url_id', 'user_id'], + select( + AnnotationRecordTypeAnon.record_type, + AnnotationRecordTypeAnon.url_id, + self.user_id + ).where( + AnnotationRecordTypeAnon.session_id == self.session_id + ).distinct() + ).on_conflict_do_nothing(index_elements=["url_id", "user_id"]) + await session.execute(statement) + # Delete all anonymous record type annotations. + statement = delete(AnnotationRecordTypeAnon).where( + AnnotationRecordTypeAnon.session_id == self.session_id + ) + await session.execute(statement) + + async def migrate_url_type_annotations(self, session: AsyncSession) -> None: + # Copy all url type annotations from anonymous to user. + statement = insert(AnnotationURLTypeUser).from_select( + ['type', 'url_id', 'user_id'], + select( + AnnotationURLTypeAnon.url_type, + AnnotationURLTypeAnon.url_id, + self.user_id + ).where( + AnnotationURLTypeAnon.session_id == self.session_id + ).distinct() + ).on_conflict_do_nothing(index_elements=["url_id", "user_id"]) + await session.execute(statement) + # Delete all anonymous url type annotations. + statement = delete(AnnotationURLTypeAnon).where( + AnnotationURLTypeAnon.session_id == self.session_id + ) + await session.execute(statement) + + async def migrate_name_annotations(self, session: AsyncSession) -> None: + # Copy all name annotations from anonymous to user. + statement = insert(AnnotationNameUserEndorsement).from_select( + ['suggestion_id', 'user_id'], + select( + AnnotationNameAnonEndorsement.suggestion_id, + self.user_id + ).where( + AnnotationNameAnonEndorsement.session_id == self.session_id + ).distinct() + ).on_conflict_do_nothing(index_elements=["suggestion_id", "user_id"]) + await session.execute(statement) + # Delete all anonymous name annotations. + statement = delete(AnnotationNameAnonEndorsement).where( + AnnotationNameAnonEndorsement.session_id == self.session_id + ) + await session.execute(statement) diff --git a/src/api/endpoints/annotate/routes.py b/src/api/endpoints/annotate/routes.py index 1633eb5a..0af2afcb 100644 --- a/src/api/endpoints/annotate/routes.py +++ b/src/api/endpoints/annotate/routes.py @@ -12,10 +12,12 @@ from src.api.endpoints.annotate.anonymous.get.query import GetNextURLForAnonymousAnnotationQueryBuilder from src.api.endpoints.annotate.anonymous.get.response import GetNextURLForAnonymousAnnotationResponse from src.api.endpoints.annotate.anonymous.post.query import AddAnonymousAnnotationsToURLQueryBuilder +from src.api.endpoints.annotate.migrate.query import MigrateAnonymousAnnotationsQueryBuilder +from src.api.shared.models.message_response import MessageResponse from src.core.core import AsyncCore from src.db.queries.implementations.anonymous_session import MakeAnonymousSessionQueryBuilder from src.security.dtos.access_info import AccessInfo -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info, get_standard_user_access_info annotate_router = APIRouter( prefix="/annotate", @@ -76,7 +78,7 @@ async def annotate_url_for_all_annotations_and_get_next_url_anonymous( @annotate_router.get("/all") async def get_next_url_for_all_annotations( - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_standard_user_access_info), async_core: AsyncCore = Depends(get_async_core), batch_id: int | None = batch_query, anno_url_id: int | None = url_id_query @@ -92,7 +94,7 @@ async def annotate_url_for_all_annotations_and_get_next_url( url_id: int, all_annotation_post_info: AllAnnotationPostInfo, async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_standard_user_access_info), batch_id: int | None = batch_query, anno_url_id: int | None = url_id_query ) -> GetNextURLForAllAnnotationResponse: @@ -113,11 +115,28 @@ async def annotate_url_for_all_annotations_and_get_next_url( url_id=anno_url_id ) +@annotate_router.post('/migrate') +async def migrate_annotations_to_user( + async_core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_standard_user_access_info), + session_id: UUID = Query(description="The session id of the anonymous user") +) -> MessageResponse: + """Migrate annotations from an anonymous session to a user's account.""" + await async_core.adb_client.run_query_builder( + MigrateAnonymousAnnotationsQueryBuilder( + session_id=session_id, + user_id=access_info.user_id + ) + ) + return MessageResponse( + message="Annotations migrated successfully." + ) + @annotate_router.get("/suggestions/agencies/{url_id}") async def get_agency_suggestions( url_id: int, async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), location_id: int | None = Query(default=None) ) -> AgencyAnnotationResponseOuterInfo: return await async_core.adb_client.run_query_builder( @@ -125,4 +144,5 @@ async def get_agency_suggestions( url_id=url_id, location_id=location_id ) - ) \ No newline at end of file + ) + diff --git a/src/api/endpoints/batch/dtos/get/summaries/counts.py b/src/api/endpoints/batch/dtos/get/summaries/counts.py index 0ce4e468..0faaa20b 100644 --- a/src/api/endpoints/batch/dtos/get/summaries/counts.py +++ b/src/api/endpoints/batch/dtos/get/summaries/counts.py @@ -4,7 +4,6 @@ class BatchSummaryURLCounts(BaseModel): total: int pending: int - duplicate: int not_relevant: int submitted: int errored: int diff --git a/src/api/endpoints/batch/routes.py b/src/api/endpoints/batch/routes.py index 87839fb7..81abb7bc 100644 --- a/src/api/endpoints/batch/routes.py +++ b/src/api/endpoints/batch/routes.py @@ -10,9 +10,9 @@ from src.api.endpoints.batch.urls.dto import GetURLsByBatchResponse from src.collectors.enums import CollectorType from src.core.core import AsyncCore -from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum +from src.db.models.materialized_views.batch_url_status.enums import BatchURLStatusViewEnum from src.security.dtos.access_info import AccessInfo -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info batch_router = APIRouter( prefix="/batch", @@ -27,7 +27,7 @@ async def get_batch_status( description="Filter by collector type", default=None ), - status: BatchURLStatusEnum | None = Query( + status: BatchURLStatusViewEnum | None = Query( description="Filter by status", default=None ), @@ -36,7 +36,7 @@ async def get_batch_status( default=1 ), core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> GetBatchSummariesResponse: """ Get the status of recent batches @@ -52,7 +52,7 @@ async def get_batch_status( async def get_batch_info( batch_id: int = Path(description="The batch id"), core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> BatchSummary: return await core.get_batch_info(batch_id) @@ -64,7 +64,7 @@ async def get_urls_by_batch( default=1 ), core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> GetURLsByBatchResponse: return await core.get_urls_by_batch(batch_id, page=page) @@ -76,7 +76,7 @@ async def get_duplicates_by_batch( default=1 ), core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> GetDuplicatesByBatchResponse: return await core.get_duplicate_urls_by_batch(batch_id, page=page) @@ -84,7 +84,7 @@ async def get_duplicates_by_batch( async def get_batch_logs( batch_id: int = Path(description="The batch id"), async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> GetBatchLogsResponse: """ Retrieve the logs for a recent batch. @@ -96,6 +96,6 @@ async def get_batch_logs( async def abort_batch( batch_id: int = Path(description="The batch id"), async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> MessageResponse: return await async_core.abort_batch(batch_id) \ No newline at end of file diff --git a/src/api/endpoints/collector/manual/query.py b/src/api/endpoints/collector/manual/query.py index 5ebe0e4b..8216b10b 100644 --- a/src/api/endpoints/collector/manual/query.py +++ b/src/api/endpoints/collector/manual/query.py @@ -3,7 +3,7 @@ from src.api.endpoints.collector.dtos.manual_batch.post import ManualBatchInputDTO from src.api.endpoints.collector.dtos.manual_batch.response import ManualBatchResponseDTO -from src.collectors.enums import CollectorType, URLStatus +from src.collectors.enums import CollectorType from src.core.enums import BatchStatus from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL @@ -53,7 +53,6 @@ async def run(self, session: AsyncSession) -> ManualBatchResponseDTO: name=entry.name, description=entry.description, collector_metadata=entry.collector_metadata, - status=URLStatus.OK.value, source=URLSource.MANUAL, trailing_slash=url_and_scheme.url.endswith('/'), ) diff --git a/src/api/endpoints/collector/routes.py b/src/api/endpoints/collector/routes.py index 4818dc63..0ab89261 100644 --- a/src/api/endpoints/collector/routes.py +++ b/src/api/endpoints/collector/routes.py @@ -10,7 +10,7 @@ from src.collectors.impl.example.dtos.input import ExampleInputDTO from src.collectors.enums import CollectorType from src.core.core import AsyncCore -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info from src.security.dtos.access_info import AccessInfo from src.collectors.impl.ckan.dtos.input import CKANInputDTO from src.collectors.impl.muckrock.collectors.all_foia.dto import MuckrockAllFOIARequestsCollectorInputDTO @@ -27,7 +27,7 @@ async def start_example_collector( dto: ExampleInputDTO, core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> CollectorStartInfo: """ Start the example collector @@ -42,7 +42,7 @@ async def start_example_collector( async def start_ckan_collector( dto: CKANInputDTO, core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> CollectorStartInfo: """ Start the ckan collector @@ -57,7 +57,7 @@ async def start_ckan_collector( async def start_common_crawler_collector( dto: CommonCrawlerInputDTO, core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> CollectorStartInfo: """ Start the common crawler collector @@ -72,7 +72,7 @@ async def start_common_crawler_collector( async def start_auto_googler_collector( dto: AutoGooglerInputDTO, core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> CollectorStartInfo: """ Start the auto googler collector @@ -87,7 +87,7 @@ async def start_auto_googler_collector( async def start_muckrock_collector( dto: MuckrockSimpleSearchCollectorInputDTO, core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> CollectorStartInfo: """ Start the muckrock collector @@ -102,7 +102,7 @@ async def start_muckrock_collector( async def start_muckrock_county_collector( dto: MuckrockCountySearchCollectorInputDTO, core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> CollectorStartInfo: """ Start the muckrock county level collector @@ -117,7 +117,7 @@ async def start_muckrock_county_collector( async def start_muckrock_all_foia_collector( dto: MuckrockAllFOIARequestsCollectorInputDTO, core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> CollectorStartInfo: """ Start the muckrock collector for all FOIA requests @@ -132,7 +132,7 @@ async def start_muckrock_all_foia_collector( async def upload_manual_collector( dto: ManualBatchInputDTO, core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> ManualBatchResponseDTO: """ Uploads a manual "collector" with existing data diff --git a/src/api/endpoints/contributions/routes.py b/src/api/endpoints/contributions/routes.py index c6fdc739..a5032708 100644 --- a/src/api/endpoints/contributions/routes.py +++ b/src/api/endpoints/contributions/routes.py @@ -7,7 +7,7 @@ from src.api.endpoints.contributions.user.response import ContributionsUserResponse from src.core.core import AsyncCore from src.security.dtos.access_info import AccessInfo -from src.security.manager import get_access_info +from src.security.manager import get_standard_user_access_info contributions_router = APIRouter( prefix="/contributions", @@ -17,7 +17,7 @@ @contributions_router.get("/leaderboard") async def get_leaderboard( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_standard_user_access_info) ) -> ContributionsLeaderboardResponse: """Returns the leaderboard of user contributions.""" return await core.adb_client.run_query_builder( @@ -27,7 +27,7 @@ async def get_leaderboard( @contributions_router.get("/user") async def get_user_contributions( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_standard_user_access_info) ) -> ContributionsUserResponse: """Get contributions for the user and how often their annotations agreed with the final validation of URLs. diff --git a/src/api/endpoints/contributions/shared/contributions.py b/src/api/endpoints/contributions/shared/contributions.py index ae72fc00..66d7b0be 100644 --- a/src/api/endpoints/contributions/shared/contributions.py +++ b/src/api/endpoints/contributions/shared/contributions.py @@ -1,6 +1,6 @@ from sqlalchemy import select, func, CTE, Column -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser class ContributionsCTEContainer: @@ -8,11 +8,11 @@ class ContributionsCTEContainer: def __init__(self): self._cte = ( select( - UserURLTypeSuggestion.user_id, + AnnotationURLTypeUser.user_id, func.count().label("count") ) .group_by( - UserURLTypeSuggestion.user_id + AnnotationURLTypeUser.user_id ) .cte("contributions") ) diff --git a/src/api/endpoints/contributions/user/queries/agreement/agency.py b/src/api/endpoints/contributions/user/queries/agreement/agency.py index 01000bf2..c1dfeed7 100644 --- a/src/api/endpoints/contributions/user/queries/agreement/agency.py +++ b/src/api/endpoints/contributions/user/queries/agreement/agency.py @@ -1,14 +1,14 @@ from sqlalchemy import select, func, exists, and_, or_, any_, cast, Float from src.api.endpoints.contributions.user.queries.templates.agreement import AgreementCTEContainer +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion def get_agency_agreement_cte_container() -> AgreementCTEContainer: - uuas = UserURLAgencySuggestion + uuas = AnnotationAgencyUser fuv = FlagURLValidated lau = LinkURLAgency # CTE 1: All validated Meta URLs/Data Sources and their agencies diff --git a/src/api/endpoints/contributions/user/queries/agreement/record_type.py b/src/api/endpoints/contributions/user/queries/agreement/record_type.py index 2cde5ab5..b865cb52 100644 --- a/src/api/endpoints/contributions/user/queries/agreement/record_type.py +++ b/src/api/endpoints/contributions/user/queries/agreement/record_type.py @@ -3,7 +3,7 @@ from src.api.endpoints.contributions.user.queries.annotated_and_validated import AnnotatedAndValidatedCTEContainer from src.api.endpoints.contributions.user.queries.templates.agreement import AgreementCTEContainer from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser def get_record_type_agreement_cte_container( @@ -16,8 +16,8 @@ def get_record_type_agreement_cte_container( func.count() ) .join( - UserRecordTypeSuggestion, - UserRecordTypeSuggestion.url_id == inner_cte.url_id + AnnotationRecordTypeUser, + AnnotationRecordTypeUser.url_id == inner_cte.url_id ) .group_by( inner_cte.user_id @@ -31,14 +31,14 @@ def get_record_type_agreement_cte_container( func.count() ) .join( - UserRecordTypeSuggestion, - UserRecordTypeSuggestion.url_id == inner_cte.url_id + AnnotationRecordTypeUser, + AnnotationRecordTypeUser.url_id == inner_cte.url_id ) .join( URLRecordType, and_( URLRecordType.url_id == inner_cte.url_id, - URLRecordType.record_type == UserRecordTypeSuggestion.record_type + URLRecordType.record_type == AnnotationRecordTypeUser.record_type ) ) .group_by( diff --git a/src/api/endpoints/contributions/user/queries/agreement/url_type.py b/src/api/endpoints/contributions/user/queries/agreement/url_type.py index 12feb834..a0ffc2e0 100644 --- a/src/api/endpoints/contributions/user/queries/agreement/url_type.py +++ b/src/api/endpoints/contributions/user/queries/agreement/url_type.py @@ -3,7 +3,7 @@ from src.api.endpoints.contributions.user.queries.annotated_and_validated import AnnotatedAndValidatedCTEContainer from src.api.endpoints.contributions.user.queries.templates.agreement import AgreementCTEContainer from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser def get_url_type_agreement_cte_container( @@ -17,8 +17,8 @@ def get_url_type_agreement_cte_container( func.count() ) .join( - UserURLTypeSuggestion, - UserURLTypeSuggestion.url_id == inner_cte.url_id + AnnotationURLTypeUser, + AnnotationURLTypeUser.url_id == inner_cte.url_id ) .join( FlagURLValidated, @@ -36,14 +36,14 @@ def get_url_type_agreement_cte_container( func.count() ) .join( - UserURLTypeSuggestion, - UserURLTypeSuggestion.url_id == inner_cte.url_id + AnnotationURLTypeUser, + AnnotationURLTypeUser.url_id == inner_cte.url_id ) .join( FlagURLValidated, and_( FlagURLValidated.url_id == inner_cte.url_id, - UserURLTypeSuggestion.type == FlagURLValidated.type + AnnotationURLTypeUser.type == FlagURLValidated.type ) ) diff --git a/src/api/endpoints/contributions/user/queries/annotated_and_validated.py b/src/api/endpoints/contributions/user/queries/annotated_and_validated.py index 9c7c48f6..b617449e 100644 --- a/src/api/endpoints/contributions/user/queries/annotated_and_validated.py +++ b/src/api/endpoints/contributions/user/queries/annotated_and_validated.py @@ -1,7 +1,7 @@ from sqlalchemy import select, Column, CTE from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser class AnnotatedAndValidatedCTEContainer: @@ -9,16 +9,16 @@ class AnnotatedAndValidatedCTEContainer: def __init__(self, user_id: int | None): self._cte = ( select( - UserURLTypeSuggestion.user_id, - UserURLTypeSuggestion.url_id + AnnotationURLTypeUser.user_id, + AnnotationURLTypeUser.url_id ) .join( FlagURLValidated, - FlagURLValidated.url_id == UserURLTypeSuggestion.url_id + FlagURLValidated.url_id == AnnotationURLTypeUser.url_id ) ) if user_id is not None: - self._cte = self._cte.where(UserURLTypeSuggestion.user_id == user_id) + self._cte = self._cte.where(AnnotationURLTypeUser.user_id == user_id) self._cte = self._cte.cte("annotated_and_validated") @property diff --git a/src/api/endpoints/data_source/routes.py b/src/api/endpoints/data_source/routes.py index 25787b85..a657ac18 100644 --- a/src/api/endpoints/data_source/routes.py +++ b/src/api/endpoints/data_source/routes.py @@ -13,6 +13,8 @@ from src.api.endpoints.data_source.by_id.put.request import DataSourcePutRequest from src.api.shared.models.message_response import MessageResponse from src.core.core import AsyncCore +from src.security.dtos.access_info import AccessInfo +from src.security.manager import get_admin_access_info data_sources_router = APIRouter( prefix="/data-sources", @@ -45,6 +47,7 @@ async def get_data_source_by_id( async def update_data_source( url_id: int , request: DataSourcePutRequest, + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await check_is_data_source_url(url_id=url_id, adb_client=async_core.adb_client) @@ -70,6 +73,7 @@ async def get_data_source_agencies( async def add_agency_to_data_source( url_id: int, agency_id: int, + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await add_data_source_agency_link( @@ -83,6 +87,7 @@ async def add_agency_to_data_source( async def remove_agency_from_data_source( url_id: int, agency_id: int, + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await delete_data_source_agency_link( diff --git a/src/api/endpoints/locations/routes.py b/src/api/endpoints/locations/routes.py index 4a0ef096..c86f66b5 100644 --- a/src/api/endpoints/locations/routes.py +++ b/src/api/endpoints/locations/routes.py @@ -5,6 +5,8 @@ from src.api.endpoints.locations.post.request import AddLocationRequestModel from src.api.endpoints.locations.post.response import AddLocationResponseModel from src.core.core import AsyncCore +from src.security.dtos.access_info import AccessInfo +from src.security.manager import get_admin_access_info location_url_router = APIRouter( prefix="/locations", @@ -15,6 +17,7 @@ @location_url_router.post("") async def create_location( request: AddLocationRequestModel, + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> AddLocationResponseModel: return await async_core.adb_client.run_query_builder( diff --git a/src/api/endpoints/meta_url/routes.py b/src/api/endpoints/meta_url/routes.py index 82a36756..790fd519 100644 --- a/src/api/endpoints/meta_url/routes.py +++ b/src/api/endpoints/meta_url/routes.py @@ -12,6 +12,8 @@ from src.api.endpoints.meta_url.by_id.put.request import UpdateMetaURLRequest from src.api.shared.models.message_response import MessageResponse from src.core.core import AsyncCore +from src.security.dtos.access_info import AccessInfo +from src.security.manager import get_admin_access_info meta_urls_router = APIRouter( prefix="/meta-urls", @@ -35,6 +37,7 @@ async def get_meta_urls( async def update_meta_url( url_id: int, request: UpdateMetaURLRequest, + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await check_is_meta_url(url_id=url_id, adb_client=async_core.adb_client) @@ -61,6 +64,7 @@ async def get_meta_url_agencies( async def add_agency_to_meta_url( url_id: int, agency_id: int, + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await add_meta_url_agency_link( @@ -74,6 +78,7 @@ async def add_agency_to_meta_url( async def remove_agency_from_meta_url( url_id: int, agency_id: int, + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> MessageResponse: await delete_meta_url_agency_link( diff --git a/src/api/endpoints/metrics/batches/aggregated/query/core.py b/src/api/endpoints/metrics/batches/aggregated/query/core.py index cc6259de..07015c1d 100644 --- a/src/api/endpoints/metrics/batches/aggregated/query/core.py +++ b/src/api/endpoints/metrics/batches/aggregated/query/core.py @@ -1,29 +1,15 @@ -from sqlalchemy import case, select +from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.sql.functions import coalesce, func +from sqlalchemy.sql.functions import func from src.api.endpoints.metrics.batches.aggregated.dto import GetMetricsBatchesAggregatedResponseDTO, \ GetMetricsBatchesAggregatedInnerResponseDTO -from src.api.endpoints.metrics.batches.aggregated.query.all_urls.query import CountAllURLsByBatchStrategyQueryBuilder -from src.api.endpoints.metrics.batches.aggregated.query.batch_status_.query import \ - BatchStatusByBatchStrategyQueryBuilder from src.api.endpoints.metrics.batches.aggregated.query.requester_.requester import \ GetBatchesAggregatedMetricsQueryRequester -from src.api.endpoints.metrics.batches.aggregated.query.submitted_.query import \ - CountSubmittedByBatchStrategyQueryBuilder -from src.api.endpoints.metrics.batches.aggregated.query.url_error.query import URLErrorByBatchStrategyQueryBuilder -from src.api.endpoints.metrics.batches.aggregated.query.validated_.query import \ - ValidatedURLCountByBatchStrategyQueryBuilder -from src.collectors.enums import URLStatus, CollectorType +from src.collectors.enums import CollectorType from src.core.enums import BatchStatus from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.db.queries.base.builder import QueryBuilderBase -from src.db.statement_composer import StatementComposer class GetBatchesAggregatedMetricsQueryBuilder(QueryBuilderBase): diff --git a/src/api/endpoints/metrics/batches/aggregated/query/url_error/query.py b/src/api/endpoints/metrics/batches/aggregated/query/url_error/query.py index a7b9e27a..6712c76d 100644 --- a/src/api/endpoints/metrics/batches/aggregated/query/url_error/query.py +++ b/src/api/endpoints/metrics/batches/aggregated/query/url_error/query.py @@ -4,7 +4,6 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.metrics.batches.aggregated.query.models.strategy_count import CountByBatchStrategyResponse -from src.collectors.enums import URLStatus from src.db.helpers.query import exists_url from src.db.helpers.session import session_helper as sh from src.db.models.impl.batch.sqlalchemy import Batch @@ -28,7 +27,7 @@ async def run(self, session: AsyncSession) -> list[CountByBatchStrategyResponse] .where( exists_url(URLTaskError) ) - .group_by(Batch.strategy, URL.status) + .group_by(Batch.strategy) ) mappings: Sequence[RowMapping] = await sh.mappings(session, query=query) diff --git a/src/api/endpoints/metrics/batches/breakdown/query.py b/src/api/endpoints/metrics/batches/breakdown/query.py index 5847e309..d46a01b9 100644 --- a/src/api/endpoints/metrics/batches/breakdown/query.py +++ b/src/api/endpoints/metrics/batches/breakdown/query.py @@ -1,4 +1,4 @@ -from sqlalchemy import select, case, Column +from sqlalchemy import select, Column from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.sql.functions import coalesce @@ -11,11 +11,9 @@ from src.api.endpoints.metrics.batches.breakdown.templates.cte_ import BatchesBreakdownURLCTE from src.api.endpoints.metrics.batches.breakdown.total.cte_ import TOTAL_CTE from src.api.endpoints.metrics.batches.breakdown.validated.cte_ import VALIDATED_CTE -from src.collectors.enums import URLStatus, CollectorType +from src.collectors.enums import CollectorType from src.core.enums import BatchStatus from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.url.core.sqlalchemy import URL from src.db.queries.base.builder import QueryBuilderBase from src.db.statement_composer import StatementComposer diff --git a/src/api/endpoints/metrics/dtos/get/urls/aggregated/core.py b/src/api/endpoints/metrics/dtos/get/urls/aggregated/core.py index 7dbbc48a..1c8ba860 100644 --- a/src/api/endpoints/metrics/dtos/get/urls/aggregated/core.py +++ b/src/api/endpoints/metrics/dtos/get/urls/aggregated/core.py @@ -4,7 +4,7 @@ from src.core.enums import RecordType from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.views.url_status.enums import URLStatusViewEnum +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum class GetMetricsURLValidatedOldestPendingURL(BaseModel): url_id: int diff --git a/src/api/endpoints/metrics/routes.py b/src/api/endpoints/metrics/routes.py index 59fa5906..06c09de3 100644 --- a/src/api/endpoints/metrics/routes.py +++ b/src/api/endpoints/metrics/routes.py @@ -10,7 +10,7 @@ from src.api.endpoints.metrics.dtos.get.urls.breakdown.pending import GetMetricsURLsBreakdownPendingResponseDTO from src.api.endpoints.metrics.dtos.get.urls.breakdown.submitted import GetMetricsURLsBreakdownSubmittedResponseDTO from src.core.core import AsyncCore -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info from src.security.dtos.access_info import AccessInfo metrics_router = APIRouter( @@ -22,14 +22,14 @@ @metrics_router.get("/batches/aggregated") async def get_batches_aggregated_metrics( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> GetMetricsBatchesAggregatedResponseDTO: return await core.get_batches_aggregated_metrics() @metrics_router.get("/batches/breakdown") async def get_batches_breakdown_metrics( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), page: int = Query( description="The page number", default=1 @@ -40,34 +40,34 @@ async def get_batches_breakdown_metrics( @metrics_router.get("/urls/aggregate") async def get_urls_aggregated_metrics( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> GetMetricsURLsAggregatedResponseDTO: return await core.get_urls_aggregated_metrics() @metrics_router.get("/urls/aggregate/pending") async def get_urls_aggregated_pending_metrics( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> GetMetricsURLsAggregatedPendingResponseDTO: return await core.get_urls_aggregated_pending_metrics() @metrics_router.get("/urls/breakdown/submitted") async def get_urls_breakdown_submitted_metrics( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> GetMetricsURLsBreakdownSubmittedResponseDTO: return await core.get_urls_breakdown_submitted_metrics() @metrics_router.get("/urls/breakdown/pending") async def get_urls_breakdown_pending_metrics( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> GetMetricsURLsBreakdownPendingResponseDTO: return await core.get_urls_breakdown_pending_metrics() @metrics_router.get("/backlog") async def get_backlog_metrics( core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> GetMetricsBacklogResponseDTO: return await core.get_backlog_metrics() \ No newline at end of file diff --git a/src/api/endpoints/metrics/urls/aggregated/query/core.py b/src/api/endpoints/metrics/urls/aggregated/query/core.py index c6dbc29f..880c7e3b 100644 --- a/src/api/endpoints/metrics/urls/aggregated/query/core.py +++ b/src/api/endpoints/metrics/urls/aggregated/query/core.py @@ -11,7 +11,7 @@ from src.core.enums import RecordType from src.db.helpers.session import session_helper as sh from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.views.url_status.enums import URLStatusViewEnum +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/oldest_pending_url.py b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/oldest_pending_url.py index e086b752..f8a8f571 100644 --- a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/oldest_pending_url.py +++ b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/oldest_pending_url.py @@ -3,11 +3,10 @@ from src.api.endpoints.metrics.dtos.get.urls.aggregated.core import GetMetricsURLValidatedOldestPendingURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.views.url_status.core import URLStatusMatView -from src.db.models.views.url_status.enums import URLStatusViewEnum +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum from src.db.queries.base.builder import QueryBuilderBase -from src.db.helpers.session import session_helper as sh class GetOldestPendingURLQueryBuilder(QueryBuilderBase): @@ -18,14 +17,14 @@ async def run( query = ( select( - URLStatusMatView.url_id, + URLStatusMaterializedView.url_id, URL.created_at ) .join( URL, - URLStatusMatView.url_id == URL.id + URLStatusMaterializedView.url_id == URL.id ).where( - URLStatusMatView.status.not_in( + URLStatusMaterializedView.status.not_in( [ URLStatusViewEnum.SUBMITTED.value, URLStatusViewEnum.ACCEPTED.value, diff --git a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/status.py b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/status.py index 05813ce0..6f369b32 100644 --- a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/status.py +++ b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/status.py @@ -4,8 +4,8 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.db.helpers.session import session_helper as sh -from src.db.models.views.url_status.core import URLStatusMatView -from src.db.models.views.url_status.enums import URLStatusViewEnum +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum from src.db.queries.base.builder import QueryBuilderBase @@ -18,13 +18,13 @@ async def run( query = ( select( - URLStatusMatView.status, + URLStatusMaterializedView.status, func.count( - URLStatusMatView.url_id + URLStatusMaterializedView.url_id ).label("count") ) .group_by( - URLStatusMatView.status + URLStatusMaterializedView.status ) ) diff --git a/src/api/endpoints/metrics/urls/breakdown/query/core.py b/src/api/endpoints/metrics/urls/breakdown/query/core.py index bccc7d68..df521497 100644 --- a/src/api/endpoints/metrics/urls/breakdown/query/core.py +++ b/src/api/endpoints/metrics/urls/breakdown/query/core.py @@ -1,16 +1,13 @@ -from typing import Any - from sqlalchemy import select, case, literal, func from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.metrics.dtos.get.urls.breakdown.pending import GetMetricsURLsBreakdownPendingResponseInnerDTO, \ GetMetricsURLsBreakdownPendingResponseDTO -from src.collectors.enums import URLStatus +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion from src.db.queries.base.builder import QueryBuilderBase @@ -21,19 +18,19 @@ async def run(self, session: AsyncSession) -> GetMetricsURLsBreakdownPendingResp flags = ( select( URL.id.label("url_id"), - case((UserRecordTypeSuggestion.url_id != None, literal(True)), else_=literal(False)).label( + case((AnnotationRecordTypeUser.url_id != None, literal(True)), else_=literal(False)).label( "has_user_record_type_annotation" ), - case((UserURLTypeSuggestion.url_id != None, literal(True)), else_=literal(False)).label( + case((AnnotationURLTypeUser.url_id != None, literal(True)), else_=literal(False)).label( "has_user_relevant_annotation" ), - case((UserURLAgencySuggestion.url_id != None, literal(True)), else_=literal(False)).label( + case((AnnotationAgencyUser.url_id != None, literal(True)), else_=literal(False)).label( "has_user_agency_annotation" ), ) - .outerjoin(UserRecordTypeSuggestion, URL.id == UserRecordTypeSuggestion.url_id) - .outerjoin(UserURLTypeSuggestion, URL.id == UserURLTypeSuggestion.url_id) - .outerjoin(UserURLAgencySuggestion, URL.id == UserURLAgencySuggestion.url_id) + .outerjoin(AnnotationRecordTypeUser, URL.id == AnnotationRecordTypeUser.url_id) + .outerjoin(AnnotationURLTypeUser, URL.id == AnnotationURLTypeUser.url_id) + .outerjoin(AnnotationAgencyUser, URL.id == AnnotationAgencyUser.url_id) ).cte("flags") month = func.date_trunc('month', URL.created_at) @@ -65,8 +62,7 @@ async def run(self, session: AsyncSession) -> GetMetricsURLsBreakdownPendingResp FlagURLValidated.url_id == URL.id ) .where( - FlagURLValidated.url_id.is_(None), - URL.status == URLStatus.OK + FlagURLValidated.url_id.is_(None) ) .group_by(month) .order_by(month.asc()) diff --git a/src/db/models/impl/url/suggestion/__init__.py b/src/api/endpoints/proposals/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/__init__.py rename to src/api/endpoints/proposals/__init__.py diff --git a/src/db/models/impl/url/suggestion/agency/__init__.py b/src/api/endpoints/proposals/agencies/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/agency/__init__.py rename to src/api/endpoints/proposals/agencies/__init__.py diff --git a/src/db/models/impl/url/suggestion/agency/subtask/__init__.py b/src/api/endpoints/proposals/agencies/by_id/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/agency/subtask/__init__.py rename to src/api/endpoints/proposals/agencies/by_id/__init__.py diff --git a/src/db/models/impl/url/suggestion/agency/suggestion/__init__.py b/src/api/endpoints/proposals/agencies/by_id/approve/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/agency/suggestion/__init__.py rename to src/api/endpoints/proposals/agencies/by_id/approve/__init__.py diff --git a/src/api/endpoints/proposals/agencies/by_id/approve/query.py b/src/api/endpoints/proposals/agencies/by_id/approve/query.py new file mode 100644 index 00000000..07dd21ff --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/approve/query.py @@ -0,0 +1,152 @@ +from pydantic import BaseModel +from sqlalchemy import select, func, RowMapping, update +from sqlalchemy.exc import NoResultFound +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.proposals.agencies.by_id.approve.response import ProposalAgencyApproveResponse +from src.db.models.impl.agency.enums import JurisdictionType, AgencyType +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.db.models.impl.proposals.agency_.core import ProposalAgency +from src.db.models.impl.proposals.agency_.decision_info import ProposalAgencyDecisionInfo +from src.db.models.impl.proposals.agency_.link__location import ProposalLinkAgencyLocation +from src.db.models.impl.proposals.enums import ProposalStatus +from src.db.queries.base.builder import QueryBuilderBase + +class _ProposalAgencyIntermediateModel(BaseModel): + proposal_id: int + name: str + agency_type: AgencyType + jurisdiction_type: JurisdictionType | None + proposal_status: ProposalStatus + location_ids: list[int] + +class ProposalAgencyApproveQueryBuilder(QueryBuilderBase): + + def __init__( + self, + proposed_agency_id: int, + deciding_user_id: int + ): + super().__init__() + self.proposed_agency_id = proposed_agency_id + self.deciding_user_id = deciding_user_id + + async def run(self, session: AsyncSession) -> ProposalAgencyApproveResponse: + + # Get proposed agency + proposed_agency: _ProposalAgencyIntermediateModel | None = await self._get_proposed_agency(session=session) + if proposed_agency is None: + return ProposalAgencyApproveResponse( + message="Proposed agency not found.", + success=False + ) + + # Confirm proposed agency is pending. Otherwise, fail early + if proposed_agency.proposal_status != ProposalStatus.PENDING: + return ProposalAgencyApproveResponse( + message="Proposed agency is not pending.", + success=False + ) + + await self._add_decision_info(session=session) + + promoted_agency_id: int = await self._add_promoted_agency( + session=session, + proposed_agency=proposed_agency + ) + + await self._add_location_links( + session=session, + promoted_agency_id=promoted_agency_id, + location_ids=proposed_agency.location_ids + ) + + await self._update_proposed_agency_status(session=session) + + return ProposalAgencyApproveResponse( + message="Proposed agency approved.", + success=True, + agency_id=promoted_agency_id + ) + + async def _get_proposed_agency(self, session: AsyncSession) -> _ProposalAgencyIntermediateModel | None: + query = ( + select( + ProposalAgency.id, + ProposalAgency.name, + ProposalAgency.agency_type, + ProposalAgency.jurisdiction_type, + ProposalAgency.proposal_status, + func.array_agg(ProposalLinkAgencyLocation.location_id).label("location_ids") + ) + .outerjoin( + ProposalLinkAgencyLocation, + ProposalLinkAgencyLocation.proposal_agency_id == ProposalAgency.id + ) + .where( + ProposalAgency.id == self.proposed_agency_id + ) + .group_by( + ProposalAgency.id, + ProposalAgency.name, + ProposalAgency.agency_type, + ProposalAgency.jurisdiction_type + ) + ) + try: + mapping: RowMapping | None = await self.sh.mapping(session, query) + except NoResultFound: + return None + return _ProposalAgencyIntermediateModel( + proposal_id=mapping[ProposalAgency.id], + name=mapping[ProposalAgency.name], + agency_type=mapping[ProposalAgency.agency_type], + jurisdiction_type=mapping[ProposalAgency.jurisdiction_type], + proposal_status=mapping[ProposalAgency.proposal_status], + location_ids=mapping["location_ids"] if mapping["location_ids"] != [None] else [] + ) + + async def _add_decision_info(self, session: AsyncSession) -> None: + decision_info = ProposalAgencyDecisionInfo( + deciding_user_id=self.deciding_user_id, + proposal_agency_id=self.proposed_agency_id, + ) + session.add(decision_info) + + @staticmethod + async def _add_promoted_agency( + session: AsyncSession, + proposed_agency: _ProposalAgencyIntermediateModel + ) -> int: + agency = Agency( + name=proposed_agency.name, + agency_type=proposed_agency.agency_type, + jurisdiction_type=proposed_agency.jurisdiction_type, + ) + session.add(agency) + await session.flush() + return agency.id + + @staticmethod + async def _add_location_links( + session: AsyncSession, + promoted_agency_id: int, + location_ids: list[int] + ): + links: list[LinkAgencyLocation] = [] + for location_id in location_ids: + link = LinkAgencyLocation( + agency_id=promoted_agency_id, + location_id=location_id + ) + links.append(link) + session.add_all(links) + + async def _update_proposed_agency_status(self, session: AsyncSession) -> None: + query = update(ProposalAgency).where( + ProposalAgency.id == self.proposed_agency_id + ).values( + proposal_status=ProposalStatus.APPROVED + ) + await session.execute(query) diff --git a/src/api/endpoints/proposals/agencies/by_id/approve/response.py b/src/api/endpoints/proposals/agencies/by_id/approve/response.py new file mode 100644 index 00000000..9de62d6c --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/approve/response.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel + + +class ProposalAgencyApproveResponse(BaseModel): + message: str + success: bool + agency_id: int | None = None \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/anonymous/agency/__init__.py b/src/api/endpoints/proposals/agencies/by_id/locations/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/anonymous/agency/__init__.py rename to src/api/endpoints/proposals/agencies/by_id/locations/__init__.py diff --git a/src/db/models/impl/url/suggestion/anonymous/location/__init__.py b/src/api/endpoints/proposals/agencies/by_id/locations/delete/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/anonymous/location/__init__.py rename to src/api/endpoints/proposals/agencies/by_id/locations/delete/__init__.py diff --git a/src/api/endpoints/proposals/agencies/by_id/locations/delete/query.py b/src/api/endpoints/proposals/agencies/by_id/locations/delete/query.py new file mode 100644 index 00000000..1ce236cb --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/locations/delete/query.py @@ -0,0 +1,30 @@ +from sqlalchemy import delete +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.db.models.impl.proposals.agency_.link__location import ProposalLinkAgencyLocation +from src.db.queries.base.builder import QueryBuilderBase + + +class DeleteProposalAgencyLocationQueryBuilder(QueryBuilderBase): + + def __init__( + self, + agency_id: int, + location_id: int, + ): + super().__init__() + self.agency_id = agency_id + self.location_id = location_id + + async def run(self, session: AsyncSession) -> None: + statement = ( + delete(ProposalLinkAgencyLocation) + .where( + (ProposalLinkAgencyLocation.proposal_agency_id == self.agency_id) + & (ProposalLinkAgencyLocation.location_id == self.location_id) + ) + ) + + await session.execute(statement) + diff --git a/src/db/models/impl/url/suggestion/anonymous/record_type/__init__.py b/src/api/endpoints/proposals/agencies/by_id/locations/get/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/anonymous/record_type/__init__.py rename to src/api/endpoints/proposals/agencies/by_id/locations/get/__init__.py diff --git a/src/api/endpoints/proposals/agencies/by_id/locations/get/query.py b/src/api/endpoints/proposals/agencies/by_id/locations/get/query.py new file mode 100644 index 00000000..bc45f8ba --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/locations/get/query.py @@ -0,0 +1,41 @@ +from typing import Sequence + +from sqlalchemy import select, RowMapping +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.agencies.by_id.locations.get.response import AgencyGetLocationsResponse +from src.api.endpoints.proposals.agencies.by_id.locations.get.response import ProposalAgencyGetLocationsOuterResponse +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.db.models.impl.proposals.agency_.link__location import ProposalLinkAgencyLocation +from src.db.models.views.location_expanded import LocationExpandedView +from src.db.queries.base.builder import QueryBuilderBase + + +class GetProposalAgencyLocationsQueryBuilder(QueryBuilderBase): + + def __init__( + self, + agency_id: int, + ): + super().__init__() + self.agency_id = agency_id + + async def run(self, session: AsyncSession) -> ProposalAgencyGetLocationsOuterResponse: + query = ( + select( + ProposalLinkAgencyLocation.location_id, + LocationExpandedView.full_display_name + ) + .where( + ProposalLinkAgencyLocation.proposal_agency_id == self.agency_id + ) + .join( + LocationExpandedView, + LocationExpandedView.id == ProposalLinkAgencyLocation.location_id + ) + ) + + result: Sequence[RowMapping] = await self.sh.mappings(session, query=query) + return ProposalAgencyGetLocationsOuterResponse( + results=[AgencyGetLocationsResponse(**row) for row in result] + ) \ No newline at end of file diff --git a/src/api/endpoints/proposals/agencies/by_id/locations/get/response.py b/src/api/endpoints/proposals/agencies/by_id/locations/get/response.py new file mode 100644 index 00000000..f6175e6d --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/locations/get/response.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel + +from src.api.endpoints.agencies.by_id.locations.get.response import AgencyGetLocationsResponse + + +class ProposalAgencyGetLocationsOuterResponse(BaseModel): + results: list[AgencyGetLocationsResponse] \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/anonymous/session/__init__.py b/src/api/endpoints/proposals/agencies/by_id/locations/post/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/anonymous/session/__init__.py rename to src/api/endpoints/proposals/agencies/by_id/locations/post/__init__.py diff --git a/src/api/endpoints/proposals/agencies/by_id/locations/post/query.py b/src/api/endpoints/proposals/agencies/by_id/locations/post/query.py new file mode 100644 index 00000000..439482e5 --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/locations/post/query.py @@ -0,0 +1,23 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.proposals.agency_.link__location import ProposalLinkAgencyLocation +from src.db.queries.base.builder import QueryBuilderBase + + +class AddProposalAgencyLocationQueryBuilder(QueryBuilderBase): + + def __init__( + self, + agency_id: int, + location_id: int + ): + super().__init__() + self.agency_id = agency_id + self.location_id = location_id + + async def run(self, session: AsyncSession) -> None: + lal = ProposalLinkAgencyLocation( + proposal_agency_id=self.agency_id, + location_id=self.location_id, + ) + session.add(lal) \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/anonymous/url_type/__init__.py b/src/api/endpoints/proposals/agencies/by_id/put/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/anonymous/url_type/__init__.py rename to src/api/endpoints/proposals/agencies/by_id/put/__init__.py diff --git a/src/api/endpoints/proposals/agencies/by_id/put/query.py b/src/api/endpoints/proposals/agencies/by_id/put/query.py new file mode 100644 index 00000000..996cd909 --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/put/query.py @@ -0,0 +1,45 @@ +from fastapi import HTTPException +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.proposals.agencies.by_id.put.request import ProposalAgencyPutRequest +from src.db.models.impl.proposals.agency_.core import ProposalAgency +from src.db.queries.base.builder import QueryBuilderBase + + +class UpdateProposalAgencyQueryBuilder(QueryBuilderBase): + + def __init__( + self, + agency_id: int, + request: ProposalAgencyPutRequest, + ): + super().__init__() + self.agency_id = agency_id + self.request = request + + async def run(self, session: AsyncSession) -> None: + + query = ( + select( + ProposalAgency + ) + .where( + ProposalAgency.id == self.agency_id + ) + ) + + agency: ProposalAgency | None = await self.sh.one_or_none(session, query=query) + if not agency: + raise HTTPException(status_code=400, detail="Proposed Agency not found") + + if self.request.name is not None: + agency.name = self.request.name + if self.request.type is not None: + agency.agency_type = self.request.type + if self.request.jurisdiction_type is not None: + agency.jurisdiction_type = self.request.jurisdiction_type + + + + diff --git a/src/api/endpoints/proposals/agencies/by_id/put/request.py b/src/api/endpoints/proposals/agencies/by_id/put/request.py new file mode 100644 index 00000000..4f49f17e --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/put/request.py @@ -0,0 +1,10 @@ +from src.api.shared.models.request_base import RequestBase +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType + + +class ProposalAgencyPutRequest(RequestBase): + name: str | None = None + type: AgencyType | None = None + jurisdiction_type: JurisdictionType | None = None + + diff --git a/src/db/models/impl/url/suggestion/location/__init__.py b/src/api/endpoints/proposals/agencies/by_id/reject/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/location/__init__.py rename to src/api/endpoints/proposals/agencies/by_id/reject/__init__.py diff --git a/src/api/endpoints/proposals/agencies/by_id/reject/query.py b/src/api/endpoints/proposals/agencies/by_id/reject/query.py new file mode 100644 index 00000000..e7038b4f --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/reject/query.py @@ -0,0 +1,83 @@ +from pydantic import BaseModel +from sqlalchemy import select, RowMapping, update +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.proposals.agencies.by_id.reject.request import ProposalAgencyRejectRequestModel +from src.api.endpoints.proposals.agencies.by_id.reject.response import ProposalAgencyRejectResponse +from src.db.models.impl.proposals.agency_.core import ProposalAgency +from src.db.models.impl.proposals.agency_.decision_info import ProposalAgencyDecisionInfo +from src.db.models.impl.proposals.enums import ProposalStatus +from src.db.queries.base.builder import QueryBuilderBase + +class _ProposalAgencyIntermediateModel(BaseModel): + proposal_id: int + proposal_status: ProposalStatus + + +class ProposalAgencyRejectQueryBuilder(QueryBuilderBase): + + def __init__( + self, + deciding_user_id: int, + proposed_agency_id: int, + request_model: ProposalAgencyRejectRequestModel + ): + super().__init__() + self.deciding_user_id = deciding_user_id + self.proposed_agency_id = proposed_agency_id + self.rejection_reason = request_model.rejection_reason + + async def run(self, session: AsyncSession) -> ProposalAgencyRejectResponse: + # Get proposed agency + proposed_agency: _ProposalAgencyIntermediateModel | None = await self._get_proposed_agency(session=session) + if proposed_agency is None: + return ProposalAgencyRejectResponse( + message="Proposed agency not found.", + success=False + ) + + # Confirm proposed agency is pending. Otherwise, fail early + if proposed_agency.proposal_status != ProposalStatus.PENDING: + return ProposalAgencyRejectResponse( + message="Proposed agency is not pending.", + success=False + ) + + await self._add_decision_info(session=session) + await self._update_proposed_agency_status(session=session) + + return ProposalAgencyRejectResponse( + message="Proposed agency rejected.", + success=True + ) + + async def _get_proposed_agency(self, session: AsyncSession) -> _ProposalAgencyIntermediateModel | None: + query = ( + select( + ProposalAgency.id.label("proposal_id"), + ProposalAgency.proposal_status + ) + .where( + ProposalAgency.id == self.proposed_agency_id + ) + ) + mapping: RowMapping | None = await self.sh.mapping(session, query) + if mapping is None: + return None + return _ProposalAgencyIntermediateModel(**mapping) + + async def _add_decision_info(self, session: AsyncSession) -> None: + decision_info = ProposalAgencyDecisionInfo( + proposal_agency_id=self.proposed_agency_id, + rejection_reason=self.rejection_reason, + deciding_user_id=self.deciding_user_id + ) + session.add(decision_info) + + async def _update_proposed_agency_status(self, session: AsyncSession) -> None: + query = update(ProposalAgency).where( + ProposalAgency.id == self.proposed_agency_id + ).values( + proposal_status=ProposalStatus.REJECTED + ) + await session.execute(query) diff --git a/src/api/endpoints/proposals/agencies/by_id/reject/request.py b/src/api/endpoints/proposals/agencies/by_id/reject/request.py new file mode 100644 index 00000000..8c3b1d1c --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/reject/request.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel + + +class ProposalAgencyRejectRequestModel(BaseModel): + rejection_reason: str \ No newline at end of file diff --git a/src/api/endpoints/proposals/agencies/by_id/reject/response.py b/src/api/endpoints/proposals/agencies/by_id/reject/response.py new file mode 100644 index 00000000..af85550b --- /dev/null +++ b/src/api/endpoints/proposals/agencies/by_id/reject/response.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel + + +class ProposalAgencyRejectResponse(BaseModel): + success: bool + message: str \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/location/auto/__init__.py b/src/api/endpoints/proposals/agencies/root/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/location/auto/__init__.py rename to src/api/endpoints/proposals/agencies/root/__init__.py diff --git a/src/db/models/impl/url/suggestion/location/auto/subtask/__init__.py b/src/api/endpoints/proposals/agencies/root/get/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/location/auto/subtask/__init__.py rename to src/api/endpoints/proposals/agencies/root/get/__init__.py diff --git a/src/api/endpoints/proposals/agencies/root/get/query.py b/src/api/endpoints/proposals/agencies/root/get/query.py new file mode 100644 index 00000000..6f4df84d --- /dev/null +++ b/src/api/endpoints/proposals/agencies/root/get/query.py @@ -0,0 +1,56 @@ +from typing import Sequence + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import joinedload + +from src.api.endpoints.agencies.by_id.locations.get.response import AgencyGetLocationsResponse +from src.api.endpoints.proposals.agencies.root.get.response import ProposalAgencyGetOuterResponse, ProposalAgencyGetResponse +from src.db.models.impl.proposals.agency_.core import ProposalAgency +from src.db.models.impl.proposals.enums import ProposalStatus +from src.db.queries.base.builder import QueryBuilderBase + + +class ProposalAgencyGetQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> ProposalAgencyGetOuterResponse: + query = ( + select( + ProposalAgency + ).where( + ProposalAgency.proposal_status == ProposalStatus.PENDING + ).options( + joinedload(ProposalAgency.locations) + ) + ) + proposal_agencies: Sequence[ProposalAgency] = ( + await session.execute(query) + ).unique().scalars().all() + if len(proposal_agencies) == 0: + return ProposalAgencyGetOuterResponse( + results=[] + ) + responses: list[ProposalAgencyGetResponse] = [] + for proposal_agency in proposal_agencies: + locations: list[AgencyGetLocationsResponse] = [] + for location in proposal_agency.locations: + location = AgencyGetLocationsResponse( + location_id=location.id, + full_display_name=location.full_display_name, + ) + locations.append(location) + + response = ProposalAgencyGetResponse( + id=proposal_agency.id, + name=proposal_agency.name, + proposing_user_id=proposal_agency.proposing_user_id, + agency_type=proposal_agency.agency_type, + jurisdiction_type=proposal_agency.jurisdiction_type, + created_at=proposal_agency.created_at, + locations=locations + ) + responses.append(response) + + return ProposalAgencyGetOuterResponse( + results=responses + ) diff --git a/src/api/endpoints/proposals/agencies/root/get/response.py b/src/api/endpoints/proposals/agencies/root/get/response.py new file mode 100644 index 00000000..e5a365c1 --- /dev/null +++ b/src/api/endpoints/proposals/agencies/root/get/response.py @@ -0,0 +1,18 @@ +from datetime import datetime + +from pydantic import BaseModel + +from src.api.endpoints.agencies.by_id.locations.get.response import AgencyGetLocationsResponse +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType + +class ProposalAgencyGetResponse(BaseModel): + id: int + name: str + proposing_user_id: int | None + agency_type: AgencyType + jurisdiction_type: JurisdictionType + locations: list[AgencyGetLocationsResponse] + created_at: datetime + +class ProposalAgencyGetOuterResponse(BaseModel): + results: list[ProposalAgencyGetResponse] \ No newline at end of file diff --git a/src/api/endpoints/proposals/routes.py b/src/api/endpoints/proposals/routes.py new file mode 100644 index 00000000..9259a341 --- /dev/null +++ b/src/api/endpoints/proposals/routes.py @@ -0,0 +1,118 @@ +from fastapi import APIRouter, Depends, Path + +from src.api.dependencies import get_async_core +from src.api.endpoints.agencies.by_id.locations.get.response import AgencyGetLocationsResponse +from src.api.endpoints.proposals.agencies.by_id.approve.query import ProposalAgencyApproveQueryBuilder +from src.api.endpoints.proposals.agencies.by_id.approve.response import ProposalAgencyApproveResponse +from src.api.endpoints.proposals.agencies.by_id.locations.delete.query import DeleteProposalAgencyLocationQueryBuilder +from src.api.endpoints.proposals.agencies.by_id.locations.get.query import GetProposalAgencyLocationsQueryBuilder +from src.api.endpoints.proposals.agencies.by_id.locations.get.response import ProposalAgencyGetLocationsOuterResponse +from src.api.endpoints.proposals.agencies.by_id.locations.post.query import AddProposalAgencyLocationQueryBuilder +from src.api.endpoints.proposals.agencies.by_id.put.query import UpdateProposalAgencyQueryBuilder +from src.api.endpoints.proposals.agencies.by_id.put.request import ProposalAgencyPutRequest +from src.api.endpoints.proposals.agencies.root.get.query import ProposalAgencyGetQueryBuilder +from src.api.endpoints.proposals.agencies.root.get.response import ProposalAgencyGetOuterResponse +from src.api.endpoints.proposals.agencies.by_id.reject.query import ProposalAgencyRejectQueryBuilder +from src.api.endpoints.proposals.agencies.by_id.reject.request import ProposalAgencyRejectRequestModel +from src.api.endpoints.proposals.agencies.by_id.reject.response import ProposalAgencyRejectResponse +from src.api.shared.models.message_response import MessageResponse +from src.core.core import AsyncCore +from src.security.dtos.access_info import AccessInfo +from src.security.manager import get_admin_access_info + +proposal_router = APIRouter(prefix="/proposal", tags=["Pending"]) + +@proposal_router.get("/agencies") +async def get_pending_agencies( + async_core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_admin_access_info), +) -> ProposalAgencyGetOuterResponse: + return await async_core.adb_client.run_query_builder( + ProposalAgencyGetQueryBuilder(), + ) + +@proposal_router.post("/agencies/{proposed_agency_id}/approve") +async def approve_proposed_agency( + async_core: AsyncCore = Depends(get_async_core), + proposed_agency_id: int = Path( + description="Proposed agency ID to approve" + ), + access_info: AccessInfo = Depends(get_admin_access_info), +) -> ProposalAgencyApproveResponse: + return await async_core.adb_client.run_query_builder( + ProposalAgencyApproveQueryBuilder( + proposed_agency_id=proposed_agency_id, + deciding_user_id=access_info.user_id, + ) + ) + +@proposal_router.post("/agencies/{proposed_agency_id}/reject") +async def reject_proposed_agency( + request: ProposalAgencyRejectRequestModel, + async_core: AsyncCore = Depends(get_async_core), + proposed_agency_id: int = Path( + description="Proposed agency ID to reject" + ), + access_info: AccessInfo = Depends(get_admin_access_info), +) -> ProposalAgencyRejectResponse: + return await async_core.adb_client.run_query_builder( + ProposalAgencyRejectQueryBuilder( + proposed_agency_id=proposed_agency_id, + deciding_user_id=access_info.user_id, + request_model=request, + ) + ) + +@proposal_router.get("/agencies/{proposed_agency_id}/locations") +async def get_agency_locations( + proposed_agency_id: int = Path( + description="Agency ID to get locations for" + ), + async_core: AsyncCore = Depends(get_async_core), +) -> ProposalAgencyGetLocationsOuterResponse: + return await async_core.adb_client.run_query_builder( + GetProposalAgencyLocationsQueryBuilder(agency_id=proposed_agency_id) + ) + +@proposal_router.post("/agencies/{proposed_agency_id}/locations/{location_id}") +async def add_location_to_agency( + proposed_agency_id: int = Path( + description="Agency ID to add location to" + ), + location_id: int = Path( + description="Location ID to add" + ), + async_core: AsyncCore = Depends(get_async_core), +) -> MessageResponse: + await async_core.adb_client.run_query_builder( + AddProposalAgencyLocationQueryBuilder(agency_id=proposed_agency_id, location_id=location_id) + ) + return MessageResponse(message="Location added to agency.") + +@proposal_router.delete("/agencies/{proposed_agency_id}/locations/{location_id}") +async def remove_location_from_agency( + proposed_agency_id: int = Path( + description="Agency ID to remove location from" + ), + location_id: int = Path( + description="Location ID to remove" + ), + async_core: AsyncCore = Depends(get_async_core), +) -> MessageResponse: + await async_core.adb_client.run_query_builder( + DeleteProposalAgencyLocationQueryBuilder(agency_id=proposed_agency_id, location_id=location_id) + ) + return MessageResponse(message="Location removed from agency.") + +@proposal_router.put("/agencies/{proposed_agency_id}") +async def update_agency( + request: ProposalAgencyPutRequest, + proposed_agency_id: int = Path( + description="Agency ID to update" + ), + async_core: AsyncCore = Depends(get_async_core), +) -> MessageResponse: + await async_core.adb_client.run_query_builder( + UpdateProposalAgencyQueryBuilder(agency_id=proposed_agency_id, request=request) + ) + return MessageResponse(message="Proposed agency updated.") diff --git a/src/api/endpoints/review/reject/query.py b/src/api/endpoints/review/reject/query.py index 1f9dfe91..ed444bfb 100644 --- a/src/api/endpoints/review/reject/query.py +++ b/src/api/endpoints/review/reject/query.py @@ -4,7 +4,6 @@ from starlette.status import HTTP_400_BAD_REQUEST from src.api.endpoints.review.enums import RejectionReason -from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL diff --git a/src/api/endpoints/root.py b/src/api/endpoints/root.py index 03b05ed4..044c0a5f 100644 --- a/src/api/endpoints/root.py +++ b/src/api/endpoints/root.py @@ -1,6 +1,6 @@ from fastapi import APIRouter, Query, Depends -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info from src.security.dtos.access_info import AccessInfo root_router = APIRouter(prefix="", tags=["Root"]) @@ -8,7 +8,7 @@ @root_router.get("/") async def root( test: str = Query(description="A test parameter"), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> dict[str, str]: """ A simple root endpoint for testing and pinging diff --git a/src/api/endpoints/search/routes.py b/src/api/endpoints/search/routes.py index 58b661e8..aa3c730b 100644 --- a/src/api/endpoints/search/routes.py +++ b/src/api/endpoints/search/routes.py @@ -8,7 +8,7 @@ from src.api.endpoints.search.dtos.response import SearchURLResponse from src.core.core import AsyncCore from src.db.models.impl.agency.enums import JurisdictionType -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info from src.security.dtos.access_info import AccessInfo search_router = APIRouter(prefix="/search", tags=["Search"]) @@ -17,7 +17,7 @@ @search_router.get("/url") async def search_url( url: str = Query(description="The URL to search for"), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> SearchURLResponse: """ @@ -44,7 +44,7 @@ async def search_agency( description="The page to search for", default=1 ), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> list[AgencySearchResponse]: if query is None and location_id is None and jurisdiction_type is None: diff --git a/src/db/models/impl/url/suggestion/location/auto/suggestion/__init__.py b/src/api/endpoints/submit/agency/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/location/auto/suggestion/__init__.py rename to src/api/endpoints/submit/agency/__init__.py diff --git a/src/api/endpoints/submit/agency/enums.py b/src/api/endpoints/submit/agency/enums.py new file mode 100644 index 00000000..95e160df --- /dev/null +++ b/src/api/endpoints/submit/agency/enums.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class AgencyProposalRequestStatus(Enum): + SUCCESS = "SUCCESS" + PROPOSAL_DUPLICATE = "PROPOSAL_DUPLICATE" + ACCEPTED_DUPLICATE = "ACCEPTED_DUPLICATE" + ERROR = "ERROR" diff --git a/src/api/endpoints/submit/agency/helpers.py b/src/api/endpoints/submit/agency/helpers.py new file mode 100644 index 00000000..12abc550 --- /dev/null +++ b/src/api/endpoints/submit/agency/helpers.py @@ -0,0 +1,106 @@ +from sqlalchemy import func, select +from sqlalchemy.dialects.postgresql import aggregate_order_by + +from src.api.endpoints.submit.agency.request import SubmitAgencyRequestModel +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.db.models.impl.proposals.agency_.core import ProposalAgency +from src.db.models.impl.proposals.agency_.link__location import ProposalLinkAgencyLocation + + +def norm_name(col): + # POSTGRES: lower(regexp_replace(trim(name), '\s+', ' ', 'g')) + return func.lower( + func.regexp_replace(func.trim(col), r"\s+", " ", "g") + ) + +def exact_duplicates_for_approved_agency_query( + request: SubmitAgencyRequestModel, +): + link = LinkAgencyLocation + agencies = Agency + + agency_locations_cte = ( + select( + link.agency_id, + # Postgres ARRAY_AGG with deterministic ordering + func.array_agg( + aggregate_order_by( + link.location_id, + link.location_id.asc() + ) + ).label("location_ids") + ) + .group_by( + link.agency_id, + ) + .cte("agency_locations") + ) + + query = ( + select( + agencies.id, + ) + .join( + agency_locations_cte, + agency_locations_cte.c.agency_id == agencies.id + ) + .where( + norm_name(agencies.name) == request.name.lower().strip(), + agencies.jurisdiction_type == request.jurisdiction_type, + agencies.agency_type == request.agency_type, + agency_locations_cte.c.location_ids == sorted(request.location_ids), + ) + .group_by( + agencies.id, + ) + ) + + return query + + +def exact_duplicates_for_proposal_agency_query( + request: SubmitAgencyRequestModel, +): + link = ProposalLinkAgencyLocation + agencies = ProposalAgency + + agency_locations_cte = ( + select( + link.proposal_agency_id, + # Postgres ARRAY_AGG with deterministic ordering + func.array_agg( + aggregate_order_by( + link.location_id, + link.location_id.asc() + ) + ).label("location_ids") + ) + .group_by( + link.proposal_agency_id, + ) + .cte("agency_locations") + ) + + query = ( + select( + agencies.id, + ) + .join( + agency_locations_cte, + agency_locations_cte.c.proposal_agency_id == agencies.id + ) + .where( + norm_name(agencies.name) == request.name.lower().strip(), + agencies.jurisdiction_type == request.jurisdiction_type, + agencies.agency_type == request.agency_type, + agency_locations_cte.c.location_ids == sorted(request.location_ids), + ) + .group_by( + agencies.id, + ) + ) + + return query + + diff --git a/src/api/endpoints/submit/agency/query.py b/src/api/endpoints/submit/agency/query.py new file mode 100644 index 00000000..a59f5f12 --- /dev/null +++ b/src/api/endpoints/submit/agency/query.py @@ -0,0 +1,88 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.submit.agency.enums import AgencyProposalRequestStatus +from src.api.endpoints.submit.agency.helpers import \ + exact_duplicates_for_proposal_agency_query, exact_duplicates_for_approved_agency_query +from src.api.endpoints.submit.agency.request import SubmitAgencyRequestModel +from src.api.endpoints.submit.agency.response import SubmitAgencyProposalResponse +from src.db.models.impl.proposals.agency_.core import ProposalAgency +from src.db.models.impl.proposals.agency_.link__location import ProposalLinkAgencyLocation +from src.db.models.impl.proposals.enums import ProposalStatus +from src.db.queries.base.builder import QueryBuilderBase + + +class SubmitAgencyProposalQueryBuilder(QueryBuilderBase): + + def __init__(self, request: SubmitAgencyRequestModel, user_id: int): + super().__init__() + self.request = request + self.user_id = user_id + + async def run(self, session: AsyncSession) -> SubmitAgencyProposalResponse: + + # Check that an agency with the same name AND location IDs does not exist + # as an approved agency + if await self._approved_agency_exists(session): + return SubmitAgencyProposalResponse( + status=AgencyProposalRequestStatus.ACCEPTED_DUPLICATE, + details="An agency with the same properties is already approved." + ) + + # Check that an agency with the same name AND location IDs does not exist + # as a proposed agency + if await self._proposed_agency_exists(session): + return SubmitAgencyProposalResponse( + status=AgencyProposalRequestStatus.PROPOSAL_DUPLICATE, + details="An agency with the same properties is already in the proposal queue." + ) + + # Add proposed agency and get proposal ID + proposal_id: int = await self._add_proposed_agency(session) + + # Add proposed agency locations + await self._add_proposed_agency_locations( + session=session, + proposal_id=proposal_id, + location_ids=self.request.location_ids + ) + + # Return response + + return SubmitAgencyProposalResponse( + proposal_id=proposal_id, + status=AgencyProposalRequestStatus.SUCCESS, + details="Successfully added proposed agency." + ) + + async def _approved_agency_exists(self, session: AsyncSession) -> bool: + query = exact_duplicates_for_approved_agency_query(self.request) + return await self.sh.results_exist(session, query=query) + + async def _proposed_agency_exists(self, session: AsyncSession) -> bool: + query = exact_duplicates_for_proposal_agency_query(self.request) + return await self.sh.results_exist(session, query=query) + + async def _add_proposed_agency(self, session: AsyncSession) -> int: + proposal = ProposalAgency( + name=self.request.name, + jurisdiction_type=self.request.jurisdiction_type, + agency_type=self.request.agency_type, + proposing_user_id=self.user_id, + proposal_status=ProposalStatus.PENDING, + ) + session.add(proposal) + await session.flush() + return proposal.id + + async def _add_proposed_agency_locations( + self, + session: AsyncSession, + location_ids: list[int], + proposal_id: int + ) -> None: + for location_id in location_ids: + link = ProposalLinkAgencyLocation( + proposal_agency_id=proposal_id, + location_id=location_id + ) + session.add(link) diff --git a/src/api/endpoints/submit/agency/request.py b/src/api/endpoints/submit/agency/request.py new file mode 100644 index 00000000..8fef866a --- /dev/null +++ b/src/api/endpoints/submit/agency/request.py @@ -0,0 +1,11 @@ +from pydantic import BaseModel + +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType + + +class SubmitAgencyRequestModel(BaseModel): + name: str + agency_type: AgencyType + jurisdiction_type: JurisdictionType + + location_ids: list[int] \ No newline at end of file diff --git a/src/api/endpoints/submit/agency/response.py b/src/api/endpoints/submit/agency/response.py new file mode 100644 index 00000000..886713a5 --- /dev/null +++ b/src/api/endpoints/submit/agency/response.py @@ -0,0 +1,9 @@ +from pydantic import BaseModel + +from src.api.endpoints.submit.agency.enums import AgencyProposalRequestStatus + + +class SubmitAgencyProposalResponse(BaseModel): + proposal_id: int | None = None + status: AgencyProposalRequestStatus + details: str | None \ No newline at end of file diff --git a/src/api/endpoints/submit/data_source/models/response/duplicate.py b/src/api/endpoints/submit/data_source/models/response/duplicate.py index 12367372..f1414b8f 100644 --- a/src/api/endpoints/submit/data_source/models/response/duplicate.py +++ b/src/api/endpoints/submit/data_source/models/response/duplicate.py @@ -1,11 +1,11 @@ from pydantic import BaseModel -from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum class SubmitDataSourceURLDuplicateSubmissionResponse(BaseModel): message: str url_id: int url_type: URLType | None - url_status: URLStatus \ No newline at end of file + url_status: URLStatusViewEnum \ No newline at end of file diff --git a/src/api/endpoints/submit/data_source/queries/core.py b/src/api/endpoints/submit/data_source/queries/core.py index 1f97cd11..aec2e821 100644 --- a/src/api/endpoints/submit/data_source/queries/core.py +++ b/src/api/endpoints/submit/data_source/queries/core.py @@ -1,25 +1,22 @@ import uuid -from typing import Any -from sqlalchemy.exc import IntegrityError from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.submit.data_source.models.response.standard import SubmitDataSourceURLProposalResponse from src.api.endpoints.submit.data_source.request import DataSourceSubmissionRequest -from src.collectors.enums import URLStatus from src.core.enums import BatchStatus +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata -from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency -from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation -from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion from src.db.queries.base.builder import QueryBuilderBase from src.db.queries.implementations.anonymous_session import MakeAnonymousSessionQueryBuilder from src.util.models.full_url import FullURL @@ -44,7 +41,6 @@ async def run( trailing_slash=full_url.has_trailing_slash, name=self.request.name, description=self.request.description, - status=URLStatus.OK, source=URLSource.MANUAL, ) @@ -75,7 +71,7 @@ async def run( session_id: uuid.UUID = await MakeAnonymousSessionQueryBuilder().run(session=session) # Add URL Type Suggestion - url_type_suggestion = AnonymousAnnotationURLType( + url_type_suggestion = AnnotationURLTypeAnon( url_id=url_id, url_type=URLType.DATA_SOURCE, session_id=session_id @@ -84,7 +80,7 @@ async def run( # Optionally add Record Type as suggestion if self.request.record_type is not None: - record_type_suggestion = AnonymousAnnotationRecordType( + record_type_suggestion = AnnotationRecordTypeAnon( url_id=url_id, record_type=self.request.record_type.value, session_id=session_id @@ -94,7 +90,7 @@ async def run( # Optionally add Agency ID suggestions if self.request.agency_ids is not None: agency_id_suggestions = [ - AnonymousAnnotationAgency( + AnnotationAgencyAnon( url_id=url_id, agency_id=agency_id, session_id=session_id @@ -106,7 +102,7 @@ async def run( # Optionally add Location ID suggestions if self.request.location_ids is not None: location_id_suggestions = [ - AnonymousAnnotationLocation( + AnnotationLocationAnon( url_id=url_id, location_id=location_id, session_id=session_id @@ -117,7 +113,7 @@ async def run( # Optionally add name suggestion if self.request.name is not None: - name_suggestion = URLNameSuggestion( + name_suggestion = AnnotationNameSuggestion( url_id=url_id, suggestion=self.request.name, source=NameSuggestionSource.USER diff --git a/src/api/endpoints/submit/data_source/queries/duplicate.py b/src/api/endpoints/submit/data_source/queries/duplicate.py index 75346cf6..d4409e91 100644 --- a/src/api/endpoints/submit/data_source/queries/duplicate.py +++ b/src/api/endpoints/submit/data_source/queries/duplicate.py @@ -8,6 +8,7 @@ SubmitDataSourceURLDuplicateSubmissionResponse from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView from src.db.queries.base.builder import QueryBuilderBase @@ -29,13 +30,17 @@ async def run(self, session: AsyncSession) -> None: query = ( select( URL.id, - URL.status, + URLStatusMaterializedView.status, FlagURLValidated.type ) .outerjoin( FlagURLValidated, FlagURLValidated.url_id == URL.id ) + .outerjoin( + URLStatusMaterializedView, + URLStatusMaterializedView.url_id == URL.id + ) .where( URL.url == self.url ) @@ -48,7 +53,7 @@ async def run(self, session: AsyncSession) -> None: model = SubmitDataSourceURLDuplicateSubmissionResponse( message="Duplicate URL found", url_id=mapping[URL.id], - url_status=mapping[URL.status], + url_status=mapping[URLStatusMaterializedView.status], url_type=mapping[FlagURLValidated.type] ) raise HTTPException( diff --git a/src/api/endpoints/submit/routes.py b/src/api/endpoints/submit/routes.py index 2eb46c15..b7e2344c 100644 --- a/src/api/endpoints/submit/routes.py +++ b/src/api/endpoints/submit/routes.py @@ -1,11 +1,12 @@ from fastapi import APIRouter, Depends from src.api.dependencies import get_async_core - +from src.api.endpoints.submit.agency.query import SubmitAgencyProposalQueryBuilder +from src.api.endpoints.submit.agency.request import SubmitAgencyRequestModel +from src.api.endpoints.submit.agency.response import SubmitAgencyProposalResponse from src.api.endpoints.submit.data_source.models.response.duplicate import \ SubmitDataSourceURLDuplicateSubmissionResponse from src.api.endpoints.submit.data_source.models.response.standard import SubmitDataSourceURLProposalResponse -from src.api.endpoints.submit.data_source.queries.core import SubmitDataSourceURLProposalQueryBuilder from src.api.endpoints.submit.data_source.request import DataSourceSubmissionRequest from src.api.endpoints.submit.data_source.wrapper import submit_data_source_url_proposal from src.api.endpoints.submit.url.models.request import URLSubmissionRequest @@ -13,7 +14,7 @@ from src.api.endpoints.submit.url.queries.core import SubmitURLQueryBuilder from src.core.core import AsyncCore from src.security.dtos.access_info import AccessInfo -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info, get_standard_user_access_info submit_router = APIRouter(prefix="/submit", tags=["Submit"]) @@ -22,7 +23,7 @@ ) async def submit_url( request: URLSubmissionRequest, - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_standard_user_access_info), async_core: AsyncCore = Depends(get_async_core), ) -> URLSubmissionResponse: return await async_core.adb_client.run_query_builder( @@ -49,3 +50,16 @@ async def submit_data_source( request=request, adb_client=async_core.adb_client ) + +@submit_router.post("/agency") +async def submit_agency( + request: SubmitAgencyRequestModel, + async_core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_standard_user_access_info) +) -> SubmitAgencyProposalResponse: + return await async_core.adb_client.run_query_builder( + SubmitAgencyProposalQueryBuilder( + request=request, + user_id=access_info.user_id + ) + ) diff --git a/src/api/endpoints/submit/url/queries/core.py b/src/api/endpoints/submit/url/queries/core.py index 0d2c1c84..49e56a98 100644 --- a/src/api/endpoints/submit/url/queries/core.py +++ b/src/api/endpoints/submit/url/queries/core.py @@ -7,16 +7,15 @@ from src.api.endpoints.submit.url.queries.convert import convert_invalid_url_to_url_response, \ convert_duplicate_urls_to_url_response from src.api.endpoints.submit.url.queries.dedupe import DeduplicateURLQueryBuilder -from src.collectors.enums import URLStatus -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.impl.link.user_suggestion_not_found.users_submitted_url.sqlalchemy import LinkUserSubmittedURL from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.queries.base.builder import QueryBuilderBase from src.util.models.url_and_scheme import URLAndScheme from src.util.url import clean_url, get_url_and_scheme, is_valid_url @@ -61,7 +60,6 @@ async def run(self, session: AsyncSession) -> URLSubmissionResponse: url=url_and_scheme.url, scheme=url_and_scheme.scheme, source=URLSource.MANUAL, - status=URLStatus.OK, description=self.request.description, trailing_slash=url_and_scheme.url.endswith('/'), ) @@ -77,7 +75,7 @@ async def run(self, session: AsyncSession) -> URLSubmissionResponse: # Add record type as suggestion if exists if self.request.record_type is not None: - rec_sugg = UserRecordTypeSuggestion( + rec_sugg = AnnotationRecordTypeUser( user_id=self.user_id, url_id=url_insert.id, record_type=self.request.record_type.value @@ -86,7 +84,7 @@ async def run(self, session: AsyncSession) -> URLSubmissionResponse: # Add name as suggestion if exists if self.request.name is not None: - name_sugg = URLNameSuggestion( + name_sugg = AnnotationNameSuggestion( url_id=url_insert.id, suggestion=self.request.name, source=NameSuggestionSource.USER @@ -94,7 +92,7 @@ async def run(self, session: AsyncSession) -> URLSubmissionResponse: session.add(name_sugg) await session.flush() - link_name_sugg = LinkUserNameSuggestion( + link_name_sugg = AnnotationNameUserEndorsement( suggestion_id=name_sugg.id, user_id=self.user_id ) @@ -104,7 +102,7 @@ async def run(self, session: AsyncSession) -> URLSubmissionResponse: # Add location ID as suggestion if exists if self.request.location_id is not None: - loc_sugg = UserLocationSuggestion( + loc_sugg = AnnotationLocationUser( user_id=self.user_id, url_id=url_insert.id, location_id=self.request.location_id @@ -113,7 +111,7 @@ async def run(self, session: AsyncSession) -> URLSubmissionResponse: # Add agency ID as suggestion if exists if self.request.agency_id is not None: - agen_sugg = UserURLAgencySuggestion( + agen_sugg = AnnotationAgencyUser( user_id=self.user_id, url_id=url_insert.id, agency_id=self.request.agency_id diff --git a/src/api/endpoints/task/by_id/query.py b/src/api/endpoints/task/by_id/query.py index 92487327..f1ea5adb 100644 --- a/src/api/endpoints/task/by_id/query.py +++ b/src/api/endpoints/task/by_id/query.py @@ -1,9 +1,8 @@ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import selectinload, joinedload +from sqlalchemy.orm import selectinload from src.api.endpoints.task.by_id.dto import TaskInfo -from src.collectors.enums import URLStatus from src.db.enums import TaskType from src.db.models.impl.task.core import Task from src.db.models.impl.task.enums import TaskStatus @@ -35,6 +34,7 @@ async def run(self, session: AsyncSession) -> TaskInfo: error = task.errors[0].error if len(task.errors) > 0 else None # Get error info if any # Get URLs + # TODO: Revise to include URL Status from URL Web metadata urls = task.urls url_infos = [] for url in urls: @@ -43,7 +43,6 @@ async def run(self, session: AsyncSession) -> TaskInfo: batch_id=url.batch.id, url=url.url, collector_metadata=url.collector_metadata, - status=URLStatus(url.status), updated_at=url.updated_at ) url_infos.append(url_info) diff --git a/src/api/endpoints/task/routes.py b/src/api/endpoints/task/routes.py index 23f52999..3bb039b7 100644 --- a/src/api/endpoints/task/routes.py +++ b/src/api/endpoints/task/routes.py @@ -9,7 +9,7 @@ from src.db.enums import TaskType from src.core.core import AsyncCore from src.core.enums import BatchStatus -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info from src.security.dtos.access_info import AccessInfo task_router = APIRouter( @@ -34,7 +34,7 @@ async def get_tasks( default=None ), async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> GetTasksResponse: return await async_core.get_tasks( page=page, @@ -45,7 +45,7 @@ async def get_tasks( @task_router.get("/status") async def get_task_status( async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> GetTaskStatusResponseInfo: return await async_core.get_current_task_status() @@ -53,7 +53,7 @@ async def get_task_status( async def get_task_info( task_id: int = Path(description="The task id"), async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info) + access_info: AccessInfo = Depends(get_admin_access_info) ) -> TaskInfo: return await async_core.get_task_info(task_id) diff --git a/src/api/endpoints/url/get/dto.py b/src/api/endpoints/url/get/dto.py index a4616d7e..0e10c6e9 100644 --- a/src/api/endpoints/url/get/dto.py +++ b/src/api/endpoints/url/get/dto.py @@ -1,10 +1,9 @@ import datetime -from typing import Optional from pydantic import BaseModel -from src.collectors.enums import URLStatus from src.db.enums import URLMetadataAttributeType, ValidationStatus, ValidationSource, TaskType +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum class GetURLsResponseErrorInfo(BaseModel): @@ -25,7 +24,7 @@ class GetURLsResponseInnerInfo(BaseModel): id: int batch_id: int | None url: str - status: URLStatus + status: URLStatusViewEnum | None collector_metadata: dict | None updated_at: datetime.datetime created_at: datetime.datetime diff --git a/src/api/endpoints/url/get/query.py b/src/api/endpoints/url/get/query.py index 6885ef64..d9ba9047 100644 --- a/src/api/endpoints/url/get/query.py +++ b/src/api/endpoints/url/get/query.py @@ -1,12 +1,16 @@ -from sqlalchemy import select, exists +from typing import Sequence + +from sqlalchemy import select, exists, RowMapping, func +from sqlalchemy.dialects.postgresql import aggregate_order_by from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import selectinload from src.api.endpoints.url.get.dto import GetURLsResponseInfo, GetURLsResponseErrorInfo, GetURLsResponseInnerInfo -from src.collectors.enums import URLStatus from src.db.client.helpers import add_standard_limit_and_offset +from src.db.models.impl import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView from src.db.queries.base.builder import QueryBuilderBase @@ -22,39 +26,86 @@ def __init__( self.errors = errors async def run(self, session: AsyncSession) -> GetURLsResponseInfo: - statement = select(URL).options( - selectinload(URL.task_errors), - selectinload(URL.batch) - ).order_by(URL.id) + + error_cte = ( + select( + URLTaskError.url_id, + func.array_agg( + aggregate_order_by( + func.jsonb_build_object( + "task_type", URLTaskError.task_type, + "error", URLTaskError.error, + "created_at", URLTaskError.created_at + ), + URLTaskError.created_at, + ) + ).label("error_array") + ) + .group_by( + URLTaskError.url_id + ) + .cte("errors") + ) + + + query = ( + select( + URL.id, + LinkBatchURL.batch_id, + URL.full_url, + URL.collector_metadata, + URLStatusMaterializedView.status, + URL.created_at, + URL.updated_at, + URL.name, + error_cte.c.error_array + ) + .outerjoin( + LinkBatchURL + ) + .outerjoin( + URLStatusMaterializedView, + URLStatusMaterializedView.url_id == URL.id + ) + .outerjoin( + error_cte, + error_cte.c.url_id == URL.id + ) + .outerjoin( + URLScrapeInfo + ) + .order_by(URL.id) + ) if self.errors: # Only return URLs with errors - statement = statement.where( + query = query.where( exists( select(URLTaskError).where(URLTaskError.url_id == URL.id) ) ) - add_standard_limit_and_offset(statement, self.page) - execute_result = await session.execute(statement) - all_results = execute_result.scalars().all() + add_standard_limit_and_offset(query, self.page) + mappings: Sequence[RowMapping] = await self.sh.mappings(session, query) + final_results = [] - for result in all_results: + for mapping in mappings: error_results = [] - for error in result.task_errors: + error_array = mapping["error_array"] or [] + for error in error_array: error_result = GetURLsResponseErrorInfo( - task=error.task_type, - error=error.error, - updated_at=error.created_at + task=error["task_type"], + error=error["error"], + updated_at=error["created_at"] ) error_results.append(error_result) final_results.append( GetURLsResponseInnerInfo( - id=result.id, - batch_id=result.batch.id if result.batch is not None else None, - url=result.full_url, - status=URLStatus(result.status), - collector_metadata=result.collector_metadata, - updated_at=result.updated_at, - created_at=result.created_at, + id=mapping[URL.id], + batch_id=mapping[LinkBatchURL.batch_id], + url=mapping["full_url"], + collector_metadata=mapping[URL.collector_metadata], + status=mapping[URLStatusMaterializedView.status], + created_at=mapping[URL.created_at], + updated_at=mapping[URL.updated_at], errors=error_results, ) ) diff --git a/src/api/endpoints/url/routes.py b/src/api/endpoints/url/routes.py index 7d184e6e..77a0a749 100644 --- a/src/api/endpoints/url/routes.py +++ b/src/api/endpoints/url/routes.py @@ -6,7 +6,7 @@ from src.api.endpoints.url.get.dto import GetURLsResponseInfo from src.api.shared.models.message_response import MessageResponse from src.core.core import AsyncCore -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info from src.security.dtos.access_info import AccessInfo url_router = APIRouter( @@ -26,7 +26,7 @@ async def get_urls( default=False ), async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> GetURLsResponseInfo: result = await async_core.get_urls(page=page, errors=errors) return result @@ -50,6 +50,7 @@ async def get_url_screenshot( async def delete_url( url_id: int, async_core: AsyncCore = Depends(get_async_core), + access_info: AccessInfo = Depends(get_admin_access_info), ) -> MessageResponse: await async_core.adb_client.run_query_builder( DeleteURLQueryBuilder(url_id=url_id) diff --git a/src/api/main.py b/src/api/main.py index 87fa0d3a..a62e6fdf 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -19,6 +19,7 @@ from src.api.endpoints.locations.routes import location_url_router from src.api.endpoints.meta_url.routes import meta_urls_router from src.api.endpoints.metrics.routes import metrics_router +from src.api.endpoints.proposals.routes import proposal_router from src.api.endpoints.root import root_router from src.api.endpoints.search.routes import search_router from src.api.endpoints.submit.routes import submit_router @@ -199,7 +200,8 @@ async def redirect_docs(): data_sources_router, meta_urls_router, check_router, - location_url_router + location_url_router, + proposal_router ] for router in routers: diff --git a/src/collectors/enums.py b/src/collectors/enums.py index 16711a0c..2e5f6239 100644 --- a/src/collectors/enums.py +++ b/src/collectors/enums.py @@ -9,9 +9,3 @@ class CollectorType(Enum): MUCKROCK_ALL_SEARCH = "muckrock_all_search" CKAN = "ckan" MANUAL = "manual" - -class URLStatus(Enum): - OK = "ok" - ERROR = "error" - DUPLICATE = "duplicate" - BROKEN = "broken" diff --git a/src/collectors/queries/insert/url.py b/src/collectors/queries/insert/url.py index 60f39a2c..3b21d210 100644 --- a/src/collectors/queries/insert/url.py +++ b/src/collectors/queries/insert/url.py @@ -22,7 +22,6 @@ async def run(self, session: AsyncSession) -> int: url=url_and_scheme.url.rstrip('/'), scheme=url_and_scheme.scheme, collector_metadata=self.url_info.collector_metadata, - status=self.url_info.status.value, source=self.url_info.source, trailing_slash=url_and_scheme.url.endswith('/'), ) diff --git a/src/core/core.py b/src/core/core.py index ad2f20d5..cbee2d84 100644 --- a/src/core/core.py +++ b/src/core/core.py @@ -31,7 +31,7 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.enums import TaskType from src.db.models.impl.batch.pydantic.info import BatchInfo -from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum +from src.db.models.materialized_views.batch_url_status.enums import BatchURLStatusViewEnum class AsyncCore: @@ -81,7 +81,7 @@ async def get_duplicate_urls_by_batch(self, batch_id: int, page: int = 1) -> Get async def get_batch_statuses( self, collector_type: CollectorType | None, - status: BatchURLStatusEnum | None, + status: BatchURLStatusViewEnum | None, page: int ) -> GetBatchSummariesResponse: results = await self.adb_client.get_batch_summaries( diff --git a/src/core/tasks/scheduled/impl/huggingface/queries/get/mappings.py b/src/core/tasks/scheduled/impl/huggingface/queries/get/mappings.py index 0621ee52..ebef8b45 100644 --- a/src/core/tasks/scheduled/impl/huggingface/queries/get/mappings.py +++ b/src/core/tasks/scheduled/impl/huggingface/queries/get/mappings.py @@ -1,4 +1,3 @@ -from src.collectors.enums import URLStatus from src.core.enums import RecordType from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse diff --git a/src/core/tasks/scheduled/impl/huggingface/queries/prereq/requester.py b/src/core/tasks/scheduled/impl/huggingface/queries/prereq/requester.py index 1eaa306d..3abadbf5 100644 --- a/src/core/tasks/scheduled/impl/huggingface/queries/prereq/requester.py +++ b/src/core/tasks/scheduled/impl/huggingface/queries/prereq/requester.py @@ -1,19 +1,11 @@ from datetime import datetime -from operator import or_ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.sql.functions import count -from src.collectors.enums import URLStatus from src.core.tasks.scheduled.impl.huggingface.queries.cte import HuggingfacePrereqCTEContainer -from src.db.enums import TaskType -from src.db.helpers.query import not_exists_url, no_url_task_error, exists_url from src.db.helpers.session import session_helper as sh -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.state.huggingface import HuggingFaceUploadState -from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML -from src.db.models.impl.url.core.sqlalchemy import URL class CheckValidURLsUpdatedRequester: diff --git a/src/db/models/impl/url/suggestion/location/user/__init__.py b/src/core/tasks/scheduled/impl/sync_from_ds/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/location/user/__init__.py rename to src/core/tasks/scheduled/impl/sync_from_ds/__init__.py diff --git a/src/db/models/impl/url/suggestion/name/__init__.py b/src/core/tasks/scheduled/impl/sync_from_ds/impl/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/name/__init__.py rename to src/core/tasks/scheduled/impl/sync_from_ds/impl/__init__.py diff --git a/src/db/models/impl/url/suggestion/record_type/__init__.py b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/record_type/__init__.py rename to src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/__init__.py diff --git a/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/core.py b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/core.py new file mode 100644 index 00000000..c26f2525 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/core.py @@ -0,0 +1,56 @@ +from src.core.tasks.scheduled.impl.sync_from_ds.impl.follows.models.user_location_pairs import UserLocationPairs +from src.core.tasks.scheduled.impl.sync_from_ds.impl.follows.query import UpdateFollowsInDBQueryBuilder +from src.core.tasks.scheduled.impl.sync_from_ds.impl.follows.types import UserID, LocationID +from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase +from src.db.client.async_ import AsyncDatabaseClient +from src.db.enums import TaskType +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.follows.core import GetFollowsRequestBuilder +from src.external.pdap.impl.sync.follows.response import SyncFollowGetInnerResponse + + +class DSAppSyncUserFollowsGetTaskOperator(ScheduledTaskOperatorBase): + + def __init__( + self, + adb_client: AsyncDatabaseClient, + pdap_client: PDAPClient + ): + super().__init__(adb_client) + self.pdap_client = pdap_client + + @property + def task_type(self) -> TaskType: + return TaskType.SYNC_USER_FOLLOWS_GET + + async def inner_task_logic(self) -> None: + responses = await self._get_follows_from_ds() + await self._update_follows_in_db(responses) + + async def _get_follows_from_ds(self) -> list[SyncFollowGetInnerResponse]: + return await self.pdap_client.run_request_builder( + GetFollowsRequestBuilder() + ) + + async def _update_follows_in_db(self, responses: list[SyncFollowGetInnerResponse]) -> None: + # Get response tuples + api_pairs: list[UserLocationPairs] = [ + UserLocationPairs( + user_id=UserID(response.user_id), + location_id=LocationID(response.location_id) + ) + for response in responses + ] + # Run query + await self.adb_client.run_query_builder( + UpdateFollowsInDBQueryBuilder(api_pairs=api_pairs) + ) + # + # async def _get_follows_in_db(self) -> list[tuple[int, int]]: + # query = ( + # select( + # LinkLocationUserFollow.user_id, + # LinkLocationUserFollow.location_id + # ) + # ) + # mappings: Sequence[RowMapping] = await self.adb_client.mappings(query) \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/url_type/__init__.py b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/models/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/url_type/__init__.py rename to src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/models/__init__.py diff --git a/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/models/user_location_pairs.py b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/models/user_location_pairs.py new file mode 100644 index 00000000..58664fbd --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/models/user_location_pairs.py @@ -0,0 +1,19 @@ +""" + +Design Notes: + - I contemplated having this be a simple tuple, but reasoned it'd be more future-proof + if I used a Pydantic Model, so it would fail loudly in cause the API response + structure changes. + +""" + +from pydantic import BaseModel + +from src.core.tasks.scheduled.impl.sync_from_ds.impl.follows.types import LocationID, UserID + +class UserLocationPairs(BaseModel): + user_id: UserID + location_id: LocationID + + class Config: + frozen = True \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/query.py b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/query.py new file mode 100644 index 00000000..0f78a3da --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/query.py @@ -0,0 +1,74 @@ +from typing import Any, Sequence + +from sqlalchemy import select, RowMapping, delete, tuple_ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_from_ds.impl.follows.models.user_location_pairs import UserLocationPairs +from src.core.tasks.scheduled.impl.sync_from_ds.impl.follows.types import UserID, LocationID +from src.db.models.impl.link.location__user_follow import LinkLocationUserFollow +from src.db.queries.base.builder import QueryBuilderBase + +class UpdateFollowsInDBQueryBuilder(QueryBuilderBase): + + def __init__(self, api_pairs: list[UserLocationPairs]): + super().__init__() + self.api_pairs = api_pairs + + async def run(self, session: AsyncSession) -> Any: + db_pairs: list[UserLocationPairs] = await self.get_db_pairs(session) + api_pairs_set = set(self.api_pairs) + db_pairs_set = set(db_pairs) + # Get all pairs that are in the API but not in the DB + new_pairs = api_pairs_set - db_pairs_set + # Get all pairs that are in the DB but not in the API + removed_pairs = db_pairs_set - api_pairs_set + + await self.add_new_links(session, new_pairs) + await self.remove_links(session, removed_pairs) + + + async def get_db_pairs(self, session: AsyncSession) -> list[UserLocationPairs]: + query = ( + select( + LinkLocationUserFollow.user_id, + LinkLocationUserFollow.location_id + ) + ) + mappings: Sequence[RowMapping] = await self.sh.mappings(session, query=query) + return [ + UserLocationPairs( + user_id=mapping[LinkLocationUserFollow.user_id], + location_id=mapping[LinkLocationUserFollow.location_id] + ) + for mapping in mappings + ] + + async def add_new_links( + self, + session: AsyncSession, + pairs: set[UserLocationPairs] + ) -> None: + for pair in pairs: + link = LinkLocationUserFollow( + user_id=pair.user_id, + location_id=pair.location_id + ) + session.add(link) + + async def remove_links( + self, + session: AsyncSession, + removed_pairs: set[UserLocationPairs] + ) -> None: + tuples: list[tuple[UserID, LocationID]] = [ + (pair.user_id, pair.location_id) + for pair in removed_pairs + ] + statement = delete(LinkLocationUserFollow).where( + tuple_( + LinkLocationUserFollow.user_id, + LinkLocationUserFollow.location_id, + ).in_(tuples) + ) + await session.execute(statement) + diff --git a/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/types.py b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/types.py new file mode 100644 index 00000000..b3dc8e5b --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_from_ds/impl/follows/types.py @@ -0,0 +1,4 @@ +from typing import NewType + +UserID = NewType("UserID", int) +LocationID = NewType("LocationID", int) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py index 04710ba6..487850dd 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py @@ -12,6 +12,8 @@ from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView from src.db.queries.base.builder import QueryBuilderBase from src.external.pdap.enums import DataSourcesURLStatus from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel @@ -40,7 +42,7 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest: # Required URL.full_url, URL.name, - URL.status, + URLWebMetadata.status_code, URLRecordType.record_type, agency_id_cte.c.agency_ids, # Optional @@ -72,6 +74,10 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest: URLOptionalDataSourceMetadata, URL.id == URLOptionalDataSourceMetadata.url_id, ) + .outerjoin( + URLWebMetadata, + URL.id == URLWebMetadata.url_id + ) .outerjoin( URLInternetArchivesProbeMetadata, URL.id == URLInternetArchivesProbeMetadata.url_id, @@ -118,8 +124,9 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest: scraper_url=mapping[URLOptionalDataSourceMetadata.scraper_url], access_notes=mapping[URLOptionalDataSourceMetadata.access_notes], access_types=mapping[URLOptionalDataSourceMetadata.access_types] or [], + # TODO: Change to convert web metadata result to URL Status url_status=convert_sm_url_status_to_ds_url_status( - sm_url_status=mapping[URL.status], + mapping[URLWebMetadata.status_code], ), internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None, ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/cte.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/cte.py index 8f0ff65e..b1c21474 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/cte.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/cte.py @@ -18,7 +18,7 @@ def __init__(self): URL, URL.id == DSAppLinkDataSource.url_id, ) - .outerjoin( + .join( URLRecordType, URL.id == URLRecordType.url_id, ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py index a710b6f7..8b23f339 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py @@ -12,6 +12,7 @@ from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.queries.base.builder import QueryBuilderBase from src.external.pdap.enums import DataSourcesURLStatus from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel @@ -41,7 +42,7 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest: # Required URL.full_url, URL.name, - URL.status, + URLWebMetadata.status_code, URLRecordType.record_type, agency_id_cte.c.agency_ids, # Optional @@ -82,6 +83,10 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest: URLRecordType, URLRecordType.url_id == URL.id, ) + .outerjoin( + URLWebMetadata, + URLWebMetadata.url_id == URL.id, + ) .outerjoin( agency_id_cte, cte.url_id == agency_id_cte.c.url_id @@ -122,7 +127,7 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest: access_types=mapping[URLOptionalDataSourceMetadata.access_types] or [], data_portal_type_other=mapping[URLOptionalDataSourceMetadata.data_portal_type_other], url_status=convert_sm_url_status_to_ds_url_status( - sm_url_status=mapping[URL.status], + mapping[URLWebMetadata.status_code], ), internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None, ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/get.py index 5a784295..02ff8c8f 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/get.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/get.py @@ -10,6 +10,7 @@ from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.queries.base.builder import QueryBuilderBase from src.external.pdap.impl.sync.meta_urls._shared.content import MetaURLSyncContentModel from src.external.pdap.impl.sync.meta_urls.add.request import AddMetaURLsOuterRequest, AddMetaURLsInnerRequest @@ -36,7 +37,7 @@ async def run(self, session: AsyncSession) -> AddMetaURLsOuterRequest: select( cte.url_id, URL.full_url, - URL.status, + URLWebMetadata.status_code, URLInternetArchivesProbeMetadata.archive_url, agency_id_cte.c.agency_ids ) @@ -47,6 +48,10 @@ async def run(self, session: AsyncSession) -> AddMetaURLsOuterRequest: URL, URL.id == cte.url_id, ) + .join( + URLWebMetadata, + URL.id == URLWebMetadata.url_id, + ) .outerjoin( URLInternetArchivesProbeMetadata, URL.id == URLInternetArchivesProbeMetadata.url_id, @@ -73,7 +78,7 @@ async def run(self, session: AsyncSession) -> AddMetaURLsOuterRequest: agency_ids=mapping["agency_ids"], internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None, url_status=convert_sm_url_status_to_ds_url_status( - sm_url_status=mapping[URL.status], + mapping[URLWebMetadata.status_code], ), ) ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/get.py index 8cdb8ed6..c73909dc 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/get.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/get.py @@ -10,6 +10,7 @@ from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.queries.base.builder import QueryBuilderBase from src.external.pdap.impl.sync.meta_urls._shared.content import MetaURLSyncContentModel from src.external.pdap.impl.sync.meta_urls.update.request import UpdateMetaURLsOuterRequest, UpdateMetaURLsInnerRequest @@ -35,7 +36,7 @@ async def run(self, session: AsyncSession) -> UpdateMetaURLsOuterRequest: select( cte.ds_meta_url_id, URL.full_url, - URL.status, + URLWebMetadata.status_code, agency_id_cte.c.agency_ids, URLInternetArchivesProbeMetadata.archive_url, ) @@ -50,6 +51,10 @@ async def run(self, session: AsyncSession) -> UpdateMetaURLsOuterRequest: URLInternetArchivesProbeMetadata, URL.id == URLInternetArchivesProbeMetadata.url_id, ) + .outerjoin( + URLWebMetadata, + URL.id == URLWebMetadata.url_id, + ) .outerjoin( agency_id_cte, cte.url_id == agency_id_cte.c.url_id @@ -72,7 +77,7 @@ async def run(self, session: AsyncSession) -> UpdateMetaURLsOuterRequest: agency_ids=mapping["agency_ids"] or [], internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None, url_status=convert_sm_url_status_to_ds_url_status( - sm_url_status=mapping[URL.status], + mapping[URLWebMetadata.status_code], ), ) ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py b/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py index 3f586b20..3de3e502 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py @@ -1,14 +1,11 @@ -from src.collectors.enums import URLStatus from src.external.pdap.enums import DataSourcesURLStatus def convert_sm_url_status_to_ds_url_status( - sm_url_status: URLStatus + status_code: int ) -> DataSourcesURLStatus: - match sm_url_status: - case URLStatus.OK: + match status_code: + case 200: return DataSourcesURLStatus.OK - case URLStatus.BROKEN: - return DataSourcesURLStatus.BROKEN case _: - raise ValueError(f"URL status has no corresponding DS Status: {sm_url_status}") \ No newline at end of file + return DataSourcesURLStatus.BROKEN \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/update_url_status/operator.py b/src/core/tasks/scheduled/impl/update_url_status/operator.py deleted file mode 100644 index 82285996..00000000 --- a/src/core/tasks/scheduled/impl/update_url_status/operator.py +++ /dev/null @@ -1,15 +0,0 @@ -from src.core.tasks.scheduled.impl.update_url_status.query import UpdateURLStatusQueryBuilder -from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase -from src.db.enums import TaskType - - -class UpdateURLStatusOperator(ScheduledTaskOperatorBase): - - @property - def task_type(self) -> TaskType: - return TaskType.UPDATE_URL_STATUS - - async def inner_task_logic(self) -> None: - await self.adb_client.run_query_builder( - UpdateURLStatusQueryBuilder() - ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/update_url_status/query.py b/src/core/tasks/scheduled/impl/update_url_status/query.py deleted file mode 100644 index 963405b6..00000000 --- a/src/core/tasks/scheduled/impl/update_url_status/query.py +++ /dev/null @@ -1,49 +0,0 @@ -from sqlalchemy import update, exists, select -from sqlalchemy.ext.asyncio import AsyncSession - -from src.collectors.enums import URLStatus -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata -from src.db.queries.base.builder import QueryBuilderBase - - -class UpdateURLStatusQueryBuilder(QueryBuilderBase): - - async def run(self, session: AsyncSession) -> None: - - # Update broken URLs to nonbroken if their status is not 404 - query_broken_to_ok = ( - update(URL) - .values( - status=URLStatus.OK - ) - .where( - exists( - select(1).where( - URLWebMetadata.url_id == URL.id, # <-- correlate - URLWebMetadata.status_code != 404, - URL.status == URLStatus.BROKEN - ) - ) - ) - ) - - # Update ok URLs to broken if their status is 404 - query_ok_to_broken = ( - update(URL) - .values( - status=URLStatus.BROKEN - ) - .where( - exists( - select(1).where( - URLWebMetadata.url_id == URL.id, # <-- correlate - URLWebMetadata.status_code == 404, - URL.status == URLStatus.OK - ) - ) - ) - ) - - await session.execute(query_broken_to_ok) - await session.execute(query_ok_to_broken) \ No newline at end of file diff --git a/src/core/tasks/scheduled/loader.py b/src/core/tasks/scheduled/loader.py index 61169a66..38ebced3 100644 --- a/src/core/tasks/scheduled/loader.py +++ b/src/core/tasks/scheduled/loader.py @@ -12,6 +12,7 @@ from src.core.tasks.scheduled.impl.mark_never_completed.operator import MarkTaskNeverCompletedOperator from src.core.tasks.scheduled.impl.refresh_materialized_views.operator import RefreshMaterializedViewsOperator from src.core.tasks.scheduled.impl.run_url_tasks.operator import RunURLTasksTaskOperator +from src.core.tasks.scheduled.impl.sync_from_ds.impl.follows.core import DSAppSyncUserFollowsGetTaskOperator from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.add.core import DSAppSyncAgenciesAddTaskOperator from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.delete.core import DSAppSyncAgenciesDeleteTaskOperator from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.update.core import DSAppSyncAgenciesUpdateTaskOperator @@ -24,7 +25,6 @@ from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.delete.core import DSAppSyncMetaURLsDeleteTaskOperator from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.update.core import DSAppSyncMetaURLsUpdateTaskOperator from src.core.tasks.scheduled.impl.task_cleanup.operator import TaskCleanupOperator -from src.core.tasks.scheduled.impl.update_url_status.operator import UpdateURLStatusOperator from src.core.tasks.scheduled.models.entry import ScheduledTaskEntry from src.db.client.async_ import AsyncDatabaseClient from src.external.huggingface.hub.client import HuggingFaceHubClient @@ -136,6 +136,15 @@ async def load_entries(self) -> list[ScheduledTaskEntry]: enabled=self.setup_flag("INTEGRITY_MONITOR_TASK_FLAG") ), # Sync + ## Get + ScheduledTaskEntry( + operator=DSAppSyncUserFollowsGetTaskOperator( + adb_client=self.adb_client, + pdap_client=self.pdap_client + ), + interval_minutes=IntervalEnum.DAILY.value, + enabled=self.setup_flag("DS_APP_SYNC_USER_FOLLOWS_GET_TASK_FLAG") + ), ## Adds ### Agency ScheduledTaskEntry( @@ -220,13 +229,4 @@ async def load_entries(self) -> list[ScheduledTaskEntry]: interval_minutes=IntervalEnum.HOURLY.value, enabled=self.setup_flag("DS_APP_SYNC_AGENCY_DELETE_TASK_FLAG") ), - ### URL - ScheduledTaskEntry( - operator=UpdateURLStatusOperator( - adb_client=self.adb_client - ), - interval_minutes=IntervalEnum.DAILY.value, - enabled=self.setup_flag("UPDATE_URL_STATUS_TASK_FLAG") - ), - ] diff --git a/src/core/tasks/url/manager.py b/src/core/tasks/url/manager.py index 7fc6b4e3..b3da8edd 100644 --- a/src/core/tasks/url/manager.py +++ b/src/core/tasks/url/manager.py @@ -52,9 +52,7 @@ async def _run_task(self, entry: URLTaskEntry) -> None: while meets_prereq: print(f"Running {operator.task_type.value} Task") if count > TASK_REPEAT_THRESHOLD: - message = f"Task {operator.task_type.value} has been run more than {TASK_REPEAT_THRESHOLD} times in a row. Task loop terminated." - print(message) - await self.handler.post_to_discord(message=message) + await self._alert_task_repeat_threshold_exceeded(operator) break run_info: TaskOperatorRunInfo = await operator.run_task() await self.conclude_task(run_info) @@ -63,6 +61,11 @@ async def _run_task(self, entry: URLTaskEntry) -> None: count += 1 meets_prereq = await operator.meets_task_prerequisites() + async def _alert_task_repeat_threshold_exceeded(self, operator): + message = f"Task {operator.task_type.value} has been run more than {TASK_REPEAT_THRESHOLD} times in a row. Task loop terminated." + print(message) + await self.handler.post_to_discord(message=message) + async def trigger_task_run(self) -> None: await self.task_trigger.trigger_or_rerun() diff --git a/src/core/tasks/url/operators/agency_identification/core.py b/src/core/tasks/url/operators/agency_identification/core.py index 7657ea0e..536e4fec 100644 --- a/src/core/tasks/url/operators/agency_identification/core.py +++ b/src/core/tasks/url/operators/agency_identification/core.py @@ -9,7 +9,7 @@ from src.core.tasks.url.operators.base import URLTaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient from src.db.enums import TaskType -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType class AgencyIdentificationTaskOperator( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/convert.py b/src/core/tasks/url/operators/agency_identification/subtasks/convert.py index 5cead5d3..a7d4735d 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/convert.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/convert.py @@ -1,7 +1,7 @@ from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic def convert_agency_suggestions_to_subtask_data( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/flags/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/flags/core.py index 41997322..4eaeaeaa 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/flags/core.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/flags/core.py @@ -2,7 +2,7 @@ from environs import Env from src.core.tasks.url.operators.agency_identification.subtasks.flags.mappings import SUBTASK_TO_ENV_FLAG -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType class SubtaskFlagger: diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/flags/mappings.py b/src/core/tasks/url/operators/agency_identification/subtasks/flags/mappings.py index dcc0b60c..cc45b123 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/flags/mappings.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/flags/mappings.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType SUBTASK_TO_ENV_FLAG: dict[AutoAgencyIDSubtaskType, str] = { AutoAgencyIDSubtaskType.HOMEPAGE_MATCH: "AGENCY_ID_HOMEPAGE_MATCH_FLAG", diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/batch_link/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/batch_link/core.py index 9e15996f..83d4d11a 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/batch_link/core.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/batch_link/core.py @@ -6,8 +6,8 @@ from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import AgencyIDSubtaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic class AgencyBatchLinkSubtaskOperator(AgencyIDSubtaskOperatorBase): diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/core.py index 2603191a..275bb3c6 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/core.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/ckan_/core.py @@ -13,7 +13,7 @@ from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import \ AgencyIDSubtaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from src.external.pdap.client import PDAPClient diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/convert.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/convert.py index f4ba913e..186ed9ca 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/convert.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/convert.py @@ -2,9 +2,9 @@ GetHomepageMatchParams from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.models.mapping import \ SubtaskURLMapping -from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode, AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic -from src.db.models.impl.url.suggestion.agency.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic +from src.db.models.impl.annotation.agency.auto.subtask.enum import SubtaskDetailCode, AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic def convert_params_to_subtask_entries( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/core.py index f335cb3a..d072aa6d 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/core.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/core.py @@ -7,8 +7,8 @@ from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.get import \ GetHomepageMatchSubtaskURLsQueryBuilder from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import AgencyIDSubtaskOperatorBase -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic -from src.db.models.impl.url.suggestion.agency.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic class HomepageMatchSubtaskOperator( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/models/entry.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/models/entry.py index 6c65f9ad..989e1a7b 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/models/entry.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/models/entry.py @@ -1,6 +1,6 @@ from pydantic import BaseModel, Field -from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.enum import SubtaskDetailCode class GetHomepageMatchParams(BaseModel): diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/multi_agency_case.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/multi_agency_case.py index edf9e601..9c1fca04 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/multi_agency_case.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/multi_agency_case.py @@ -1,8 +1,8 @@ -from sqlalchemy import CTE, select, literal +from sqlalchemy import select, literal from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.consolidated import \ CONSOLIDATED_CTE -from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.enum import SubtaskDetailCode MULTI_AGENCY_CASE_QUERY = ( select( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/single_agency_case.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/single_agency_case.py index 5778ecb6..31638d31 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/single_agency_case.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/single_agency_case.py @@ -2,7 +2,7 @@ from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.consolidated import \ CONSOLIDATED_CTE -from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.enum import SubtaskDetailCode SINGLE_AGENCY_CASE_QUERY = ( select( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/get.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/get.py index 10619531..05f7dd81 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/get.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/get.py @@ -10,7 +10,7 @@ from src.core.tasks.url.operators.agency_identification.subtasks.impl.homepage_match_.queries.ctes.single_agency_case import \ SINGLE_AGENCY_CASE_QUERY from src.db.helpers.session import session_helper as sh -from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.enum import SubtaskDetailCode from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/core.py index 030139ad..dd77b94e 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/core.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/muckrock_/core.py @@ -16,8 +16,8 @@ from src.core.tasks.url.operators.agency_identification.subtasks.queries.match_agency import MatchAgencyQueryBuilder from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import AgencyIDSubtaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic from src.external.pdap.client import PDAPClient diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/convert.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/convert.py index 2766bff0..a3b8bb0f 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/convert.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/convert.py @@ -2,8 +2,8 @@ NLPLocationMatchSubtaskInput from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic def convert_location_agency_mappings_to_subtask_data_list( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/query_/query.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/query_/query.py index f0dcac94..94eb48aa 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/query_/query.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/nlp_location_match_/query_/query.py @@ -8,9 +8,9 @@ NLPLocationMatchSubtaskInput, LocationAnnotationToAgencyIDMapping, LocationAnnotation from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.impl.nlp_location import \ NLP_LOCATION_CONTAINER +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion from src.db.queries.base.builder import QueryBuilderBase from src.db.helpers.session import session_helper as sh @@ -21,21 +21,21 @@ async def run(self, session: AsyncSession) -> list[NLPLocationMatchSubtaskInput] query = ( select( NLP_LOCATION_CONTAINER.url_id, - LocationIDSubtaskSuggestion.location_id, - LocationIDSubtaskSuggestion.confidence, + AnnotationLocationAutoSuggestion.location_id, + AnnotationLocationAutoSuggestion.confidence, LinkAgencyLocation.agency_id, ) .join( - AutoLocationIDSubtask, - AutoLocationIDSubtask.url_id == NLP_LOCATION_CONTAINER.url_id + AnnotationLocationAutoSubtask, + AnnotationLocationAutoSubtask.url_id == NLP_LOCATION_CONTAINER.url_id ) .join( - LocationIDSubtaskSuggestion, - LocationIDSubtaskSuggestion.subtask_id == AutoLocationIDSubtask.id + AnnotationLocationAutoSuggestion, + AnnotationLocationAutoSuggestion.subtask_id == AnnotationLocationAutoSubtask.id ) .join( LinkAgencyLocation, - LinkAgencyLocation.location_id == LocationIDSubtaskSuggestion.location_id + LinkAgencyLocation.location_id == AnnotationLocationAutoSuggestion.location_id ) .where( ~NLP_LOCATION_CONTAINER.entry_exists diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/loader.py b/src/core/tasks/url/operators/agency_identification/subtasks/loader.py index 24099540..fd14d34e 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/loader.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/loader.py @@ -10,7 +10,7 @@ NLPLocationMatchSubtaskOperator from src.core.tasks.url.operators.agency_identification.subtasks.templates.subtask import AgencyIDSubtaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from src.external.pdap.client import PDAPClient diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/models/subtask.py b/src/core/tasks/url/operators/agency_identification/subtasks/models/subtask.py index 7da0a8f5..99f7b2d9 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/models/subtask.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/models/subtask.py @@ -1,7 +1,7 @@ from pydantic import BaseModel from src.core.tasks.url.operators.agency_identification.subtasks.models.suggestion import AgencySuggestion -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic class AutoAgencyIDSubtaskData(BaseModel): diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/constants.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/constants.py index bea99266..38a8b44c 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/constants.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/constants.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType # Determines priority of subtasks, all else being equal. SUBTASK_HIERARCHY: list[AutoAgencyIDSubtaskType] = [ diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/core.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/core.py index 2b81d2de..ef90db7f 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/core.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/core.py @@ -7,7 +7,7 @@ SUBTASK_HIERARCHY_MAPPING from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.eligible_counts import \ ELIGIBLE_COUNTS_QUERY -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from src.db.queries.base.builder import QueryBuilderBase from src.db.helpers.session import session_helper as sh diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/high_confidence_annotations.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/high_confidence_annotations.py index cfb92327..4c5aaa78 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/high_confidence_annotations.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/exists/high_confidence_annotations.py @@ -2,24 +2,24 @@ from src.core.tasks.url.operators._shared.container.subtask.exists import \ URLsSubtaskExistsCTEContainer +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion cte = ( select( URL.id ) .join( - URLAutoAgencyIDSubtask, - URLAutoAgencyIDSubtask.url_id == URL.id, + AnnotationAgencyAutoSubtask, + AnnotationAgencyAutoSubtask.url_id == URL.id, ) .join( - AgencyIDSubtaskSuggestion, - AgencyIDSubtaskSuggestion.subtask_id == URLAutoAgencyIDSubtask.id, + AnnotationAgencyAutoSuggestion, + AnnotationAgencyAutoSuggestion.subtask_id == AnnotationAgencyAutoSubtask.id, ) .where( - AgencyIDSubtaskSuggestion.confidence >= 95, + AnnotationAgencyAutoSuggestion.confidence >= 95, ) .cte("high_confidence_annotations_exists") ) diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/helpers.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/helpers.py index b06442ea..7f4aff78 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/helpers.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/helpers.py @@ -1,8 +1,8 @@ from sqlalchemy import ColumnElement, exists +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask def get_exists_subtask_query( @@ -11,8 +11,8 @@ def get_exists_subtask_query( return ( exists() .where( - URLAutoAgencyIDSubtask.url_id == URL.id, - URLAutoAgencyIDSubtask.type == subtask_type, + AnnotationAgencyAutoSubtask.url_id == URL.id, + AnnotationAgencyAutoSubtask.type == subtask_type, ) .label("subtask_entry_exists") ) \ No newline at end of file diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/batch_link.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/batch_link.py index 42fcc02f..167262b8 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/batch_link.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/batch_link.py @@ -3,10 +3,10 @@ from src.core.tasks.url.operators._shared.container.subtask.eligible import URLsSubtaskEligibleCTEContainer from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.helpers import \ get_exists_subtask_query +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from src.db.models.impl.link.agency_batch.sqlalchemy import LinkAgencyBatch from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType cte = ( select( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/ckan.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/ckan.py index 6b8ed9e8..052a5fb3 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/ckan.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/ckan.py @@ -4,10 +4,10 @@ from src.core.tasks.url.operators._shared.container.subtask.eligible import URLsSubtaskEligibleCTEContainer from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.helpers import \ get_exists_subtask_query +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType cte = ( select( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/homepage.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/homepage.py index 7daba916..7cc9a065 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/homepage.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/homepage.py @@ -5,8 +5,8 @@ CONSOLIDATED_CTE from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.helpers import \ get_exists_subtask_query +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType VALID_URL_FLAG = ( exists() diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/muckrock.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/muckrock.py index 9e267f66..5a83e029 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/muckrock.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/muckrock.py @@ -4,10 +4,10 @@ from src.core.tasks.url.operators._shared.container.subtask.eligible import URLsSubtaskEligibleCTEContainer from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.helpers import \ get_exists_subtask_query +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType cte = ( select( diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location.py index 7a15b67a..fb22379d 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location.py @@ -5,11 +5,11 @@ from src.core.tasks.url.operators._shared.container.subtask.eligible import URLsSubtaskEligibleCTEContainer from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.subtask.helpers import \ get_exists_subtask_query +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion cte = ( select( @@ -19,10 +19,10 @@ ) ) .join( - AutoLocationIDSubtask, + AnnotationLocationAutoSubtask, and_( - AutoLocationIDSubtask.url_id == URL.id, - AutoLocationIDSubtask.locations_found + AnnotationLocationAutoSubtask.url_id == URL.id, + AnnotationLocationAutoSubtask.locations_found ) ) .where( @@ -32,12 +32,12 @@ LinkAgencyLocation.location_id ) .join( - LocationIDSubtaskSuggestion, - LocationIDSubtaskSuggestion.location_id == LinkAgencyLocation.location_id, + AnnotationLocationAutoSuggestion, + AnnotationLocationAutoSuggestion.location_id == LinkAgencyLocation.location_id, ) .join( - AutoLocationIDSubtask, - AutoLocationIDSubtask.id == LocationIDSubtaskSuggestion.subtask_id, + AnnotationLocationAutoSubtask, + AnnotationLocationAutoSubtask.id == AnnotationLocationAutoSuggestion.subtask_id, ) ) diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/eligible_counts.py b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/eligible_counts.py index d3b7fe6b..79067aae 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/eligible_counts.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/queries/survey/queries/eligible_counts.py @@ -2,7 +2,7 @@ from src.core.tasks.url.operators.agency_identification.subtasks.queries.survey.queries.ctes.eligible import \ EligibleContainer -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType def sum_count(col: ColumnElement[bool], subtask_type: AutoAgencyIDSubtaskType) -> ColumnElement[int]: diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/templates/subtask.py b/src/core/tasks/url/operators/agency_identification/subtasks/templates/subtask.py index 9335afcf..d88933eb 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/templates/subtask.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/templates/subtask.py @@ -6,8 +6,8 @@ from src.core.tasks.url.operators.agency_identification.subtasks.models.subtask import AutoAgencyIDSubtaskData from src.db.client.async_ import AsyncDatabaseClient from src.db.enums import TaskType -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic -from src.db.models.impl.url.suggestion.agency.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic from src.db.models.impl.url.task_error.pydantic_.insert import URLTaskErrorPydantic from src.db.models.impl.url.task_error.pydantic_.small import URLTaskErrorSmall diff --git a/src/core/tasks/url/operators/auto_name/clean.py b/src/core/tasks/url/operators/auto_name/clean.py index 2e1820ab..9c745829 100644 --- a/src/core/tasks/url/operators/auto_name/clean.py +++ b/src/core/tasks/url/operators/auto_name/clean.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.location.auto.subtask.constants import MAX_SUGGESTION_LENGTH +from src.db.models.impl.annotation.location.auto.subtask.constants import MAX_SUGGESTION_LENGTH def clean_title(title: str) -> str: diff --git a/src/core/tasks/url/operators/auto_name/core.py b/src/core/tasks/url/operators/auto_name/core.py index 00af9838..b5702008 100644 --- a/src/core/tasks/url/operators/auto_name/core.py +++ b/src/core/tasks/url/operators/auto_name/core.py @@ -4,8 +4,8 @@ from src.core.tasks.url.operators.auto_name.queries.prereq import AutoNamePrerequisitesQueryBuilder from src.core.tasks.url.operators.base import URLTaskOperatorBase from src.db.enums import TaskType -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.pydantic import URLNameSuggestionPydantic +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.pydantic import URLNameSuggestionPydantic class AutoNameURLTaskOperator(URLTaskOperatorBase): diff --git a/src/core/tasks/url/operators/auto_name/queries/cte.py b/src/core/tasks/url/operators/auto_name/queries/cte.py index 1c7fc503..ff8a958b 100644 --- a/src/core/tasks/url/operators/auto_name/queries/cte.py +++ b/src/core/tasks/url/operators/auto_name/queries/cte.py @@ -2,10 +2,10 @@ from src.db.enums import URLHTMLContentType, TaskType from src.db.helpers.query import no_url_task_error +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion class AutoNamePrerequisiteCTEContainer: @@ -24,11 +24,11 @@ def __init__(self): URLHTMLContent.content_type == URLHTMLContentType.TITLE.value, ~exists( select( - URLNameSuggestion.id + AnnotationNameSuggestion.id ) .where( - URLNameSuggestion.url_id == URL.id, - URLNameSuggestion.source == NameSuggestionSource.HTML_METADATA_TITLE.value, + AnnotationNameSuggestion.url_id == URL.id, + AnnotationNameSuggestion.source == NameSuggestionSource.HTML_METADATA_TITLE.value, ) ), no_url_task_error(TaskType.AUTO_NAME) diff --git a/src/core/tasks/url/operators/auto_relevant/core.py b/src/core/tasks/url/operators/auto_relevant/core.py index 3acff217..ea2a80d4 100644 --- a/src/core/tasks/url/operators/auto_relevant/core.py +++ b/src/core/tasks/url/operators/auto_relevant/core.py @@ -5,9 +5,8 @@ from src.core.tasks.url.operators.auto_relevant.sort import separate_success_and_error_subsets from src.core.tasks.url.operators.base import URLTaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.url_type.auto.pydantic.input import AutoRelevancyAnnotationInput from src.db.enums import TaskType -from src.db.models.impl.url.task_error.pydantic_.insert import URLTaskErrorPydantic +from src.db.models.impl.annotation.url_type.auto.pydantic.input import AutoRelevancyAnnotationInput from src.db.models.impl.url.task_error.pydantic_.small import URLTaskErrorSmall from src.external.huggingface.inference.client import HuggingFaceInferenceClient from src.external.huggingface.inference.models.input import BasicInput diff --git a/src/core/tasks/url/operators/auto_relevant/queries/cte.py b/src/core/tasks/url/operators/auto_relevant/queries/cte.py index c8b816fd..a4e14b2d 100644 --- a/src/core/tasks/url/operators/auto_relevant/queries/cte.py +++ b/src/core/tasks/url/operators/auto_relevant/queries/cte.py @@ -1,12 +1,11 @@ from sqlalchemy import select, CTE from sqlalchemy.orm import aliased -from src.collectors.enums import URLStatus from src.db.enums import TaskType from src.db.helpers.query import not_exists_url, no_url_task_error +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML -from src.db.models.impl.url.suggestion.url_type.auto.sqlalchemy import AutoRelevantSuggestion class AutoRelevantPrerequisitesCTEContainer: @@ -21,8 +20,7 @@ def __init__(self): URL.id == URLCompressedHTML.url_id ) .where( - URL.status == URLStatus.OK.value, - not_exists_url(AutoRelevantSuggestion), + not_exists_url(AnnotationAutoURLType), no_url_task_error(TaskType.RELEVANCY) ).cte("auto_relevant_prerequisites") ) diff --git a/src/core/tasks/url/operators/auto_relevant/queries/get.py b/src/core/tasks/url/operators/auto_relevant/queries/get.py index b566bb42..1ed115fa 100644 --- a/src/core/tasks/url/operators/auto_relevant/queries/get.py +++ b/src/core/tasks/url/operators/auto_relevant/queries/get.py @@ -4,14 +4,10 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import selectinload -from src.collectors.enums import URLStatus from src.core.tasks.url.operators.auto_relevant.models.tdo import URLRelevantTDO from src.core.tasks.url.operators.auto_relevant.queries.cte import AutoRelevantPrerequisitesCTEContainer -from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.url_type.auto.sqlalchemy import AutoRelevantSuggestion from src.db.queries.base.builder import QueryBuilderBase -from src.db.statement_composer import StatementComposer from src.db.utils.compression import decompress_html diff --git a/src/core/tasks/url/operators/html/core.py b/src/core/tasks/url/operators/html/core.py index 26f70cdb..5983ab69 100644 --- a/src/core/tasks/url/operators/html/core.py +++ b/src/core/tasks/url/operators/html/core.py @@ -1,6 +1,7 @@ from src.core.tasks.url.operators.base import URLTaskOperatorBase from src.core.tasks.url.operators.html.filter import filter_just_urls, filter_404_subset from src.core.tasks.url.operators.html.queries.insert.query import InsertURLHTMLInfoQueryBuilder +from src.core.tasks.url.operators.html.queries.prerequisites import PendingURLsWithoutHTMLDataPrerequisitesQueryBuilder from src.core.tasks.url.operators.html.scraper.parser.core import HTMLResponseParser from src.core.tasks.url.operators.html.tdo import UrlHtmlTDO from src.db.client.async_ import AsyncDatabaseClient @@ -26,7 +27,9 @@ def task_type(self) -> TaskType: return TaskType.HTML async def meets_task_prerequisites(self) -> bool: - return await self.adb_client.has_non_errored_urls_without_html_data() + return await self.run_query_builder( + PendingURLsWithoutHTMLDataPrerequisitesQueryBuilder() + ) async def inner_task_logic(self) -> None: tdos = await self._get_non_errored_urls_without_html_data() diff --git a/src/core/tasks/url/operators/html/queries/get.py b/src/core/tasks/url/operators/html/queries/get.py deleted file mode 100644 index a6cbe4a8..00000000 --- a/src/core/tasks/url/operators/html/queries/get.py +++ /dev/null @@ -1,31 +0,0 @@ -from sqlalchemy.ext.asyncio import AsyncSession - -from src.db.models.impl.url.core.pydantic.info import URLInfo -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.queries.base.builder import QueryBuilderBase -from src.db.statement_composer import StatementComposer - - -class GetPendingURLsWithoutHTMLDataQueryBuilder(QueryBuilderBase): - - async def run(self, session: AsyncSession) -> list[URLInfo]: - statement = StatementComposer.has_non_errored_urls_without_html_data() - statement = statement.limit(100).order_by(URL.id) - scalar_result = await session.scalars(statement) - url_results: list[URL] = scalar_result.all() - - final_results = [] - for url in url_results: - url_info = URLInfo( - id=url.id, - batch_id=url.batch.id if url.batch is not None else None, - url=url.full_url, - collector_metadata=url.collector_metadata, - status=url.status, - created_at=url.created_at, - updated_at=url.updated_at, - name=url.name - ) - final_results.append(url_info) - - return final_results diff --git a/src/db/models/impl/url/suggestion/url_type/auto/__init__.py b/src/core/tasks/url/operators/html/queries/get/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/url_type/auto/__init__.py rename to src/core/tasks/url/operators/html/queries/get/__init__.py diff --git a/src/core/tasks/url/operators/html/queries/get/query.py b/src/core/tasks/url/operators/html/queries/get/query.py new file mode 100644 index 00000000..a4088157 --- /dev/null +++ b/src/core/tasks/url/operators/html/queries/get/query.py @@ -0,0 +1,32 @@ +from sqlalchemy import RowMapping, Sequence +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.url.operators.html.queries.helpers import has_non_errored_urls_without_html_data +from src.db.models.impl import LinkBatchURL +from src.db.models.impl.url.core.pydantic.info import URLInfo +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView +from src.db.queries.base.builder import QueryBuilderBase +from src.db.statement_composer import StatementComposer + + +class GetPendingURLsWithoutHTMLDataQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> list[URLInfo]: + query = ( + has_non_errored_urls_without_html_data() + .limit(100) + .order_by(URL.id) + ) + + mappings: Sequence[RowMapping] = await self.sh.mappings(session, query) + + final_results: list[URLInfo] = [] + for mapping in mappings: + url_info = URLInfo( + id=mapping[URL.id], + url=mapping["full_url"], + ) + final_results.append(url_info) + + return final_results diff --git a/src/core/tasks/url/operators/html/queries/helpers.py b/src/core/tasks/url/operators/html/queries/helpers.py new file mode 100644 index 00000000..4c7eb89c --- /dev/null +++ b/src/core/tasks/url/operators/html/queries/helpers.py @@ -0,0 +1,51 @@ +from sqlalchemy import ColumnElement, exists, select, Select + +from src.db.enums import TaskType +from src.db.models.impl import LinkBatchURL +from src.db.models.impl.link.task_url import LinkTaskURL +from src.db.models.impl.task.core import Task +from src.db.models.impl.task.enums import TaskStatus +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView + + +def _exclude_completed_html_task_subquery() -> ColumnElement[bool]: + return ~exists( + select(1) + .select_from( + LinkTaskURL + ) + .join( + Task, + LinkTaskURL.task_id == Task.id + ) + .where( + LinkTaskURL.url_id == URL.id, + Task.task_type == TaskType.HTML.value, + Task.task_status == TaskStatus.COMPLETE.value + ) + ) + +def has_non_errored_urls_without_html_data() -> Select: + query = ( + select( + URL.id, + URL.full_url, + ) + .join( + URLWebMetadata, + URLWebMetadata.url_id == URL.id + ) + .outerjoin( + URLScrapeInfo + ) + .where( + URLScrapeInfo.url_id == None, + _exclude_completed_html_task_subquery, + URLWebMetadata.status_code == 200, + URLWebMetadata.content_type.like("%html%"), + ) + ) + return query diff --git a/src/core/tasks/url/operators/html/queries/prerequisites.py b/src/core/tasks/url/operators/html/queries/prerequisites.py new file mode 100644 index 00000000..5fa0c94a --- /dev/null +++ b/src/core/tasks/url/operators/html/queries/prerequisites.py @@ -0,0 +1,13 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.url.operators.html.queries.helpers import has_non_errored_urls_without_html_data +from src.db.queries.base.builder import QueryBuilderBase + + +class PendingURLsWithoutHTMLDataPrerequisitesQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> bool: + statement = has_non_errored_urls_without_html_data() + statement = statement.limit(1) + scalar_result = await session.scalars(statement) + return bool(scalar_result.first()) \ No newline at end of file diff --git a/src/core/tasks/url/operators/location_id/core.py b/src/core/tasks/url/operators/location_id/core.py index 3833a80c..82f7df13 100644 --- a/src/core/tasks/url/operators/location_id/core.py +++ b/src/core/tasks/url/operators/location_id/core.py @@ -8,7 +8,7 @@ from src.core.tasks.url.operators.location_id.subtasks.templates.subtask import LocationIDSubtaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient from src.db.enums import TaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType class LocationIdentificationTaskOperator( diff --git a/src/core/tasks/url/operators/location_id/subtasks/flags/core.py b/src/core/tasks/url/operators/location_id/subtasks/flags/core.py index 1b6cb55c..21765643 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/flags/core.py +++ b/src/core/tasks/url/operators/location_id/subtasks/flags/core.py @@ -1,7 +1,7 @@ from environs import Env from src.core.tasks.url.operators.location_id.subtasks.flags.mappings import SUBTASK_TO_ENV_FLAG -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType class SubtaskFlagger: diff --git a/src/core/tasks/url/operators/location_id/subtasks/flags/mappings.py b/src/core/tasks/url/operators/location_id/subtasks/flags/mappings.py index 48f5d194..548c4f7b 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/flags/mappings.py +++ b/src/core/tasks/url/operators/location_id/subtasks/flags/mappings.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType SUBTASK_TO_ENV_FLAG: dict[LocationIDSubtaskType, str] = { LocationIDSubtaskType.NLP_LOCATION_FREQUENCY: "LOCATION_ID_NLP_LOCATION_MATCH_FLAG", diff --git a/src/core/tasks/url/operators/location_id/subtasks/impl/batch_link/core.py b/src/core/tasks/url/operators/location_id/subtasks/impl/batch_link/core.py index a85e572a..59a7faf8 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/impl/batch_link/core.py +++ b/src/core/tasks/url/operators/location_id/subtasks/impl/batch_link/core.py @@ -5,8 +5,8 @@ from src.core.tasks.url.operators.location_id.subtasks.models.suggestion import LocationSuggestion from src.core.tasks.url.operators.location_id.subtasks.templates.subtask import LocationIDSubtaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic class LocationBatchLinkSubtaskOperator(LocationIDSubtaskOperatorBase): diff --git a/src/core/tasks/url/operators/location_id/subtasks/impl/nlp_location_freq/processor/convert.py b/src/core/tasks/url/operators/location_id/subtasks/impl/nlp_location_freq/processor/convert.py index 8ec60b35..26b0ff32 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/impl/nlp_location_freq/processor/convert.py +++ b/src/core/tasks/url/operators/location_id/subtasks/impl/nlp_location_freq/processor/convert.py @@ -17,8 +17,8 @@ SearchSimilarLocationsResponse from src.core.tasks.url.operators.location_id.subtasks.models.subtask import AutoLocationIDSubtaskData from src.core.tasks.url.operators.location_id.subtasks.models.suggestion import LocationSuggestion -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic def convert_invalid_url_nlp_mappings_to_subtask_data_list( diff --git a/src/core/tasks/url/operators/location_id/subtasks/loader.py b/src/core/tasks/url/operators/location_id/subtasks/loader.py index 408b5a07..38ea8bed 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/loader.py +++ b/src/core/tasks/url/operators/location_id/subtasks/loader.py @@ -4,7 +4,7 @@ from src.core.tasks.url.operators.location_id.subtasks.impl.nlp_location_freq.processor.nlp.core import NLPProcessor from src.core.tasks.url.operators.location_id.subtasks.templates.subtask import LocationIDSubtaskOperatorBase from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType class LocationIdentificationSubtaskLoader: diff --git a/src/core/tasks/url/operators/location_id/subtasks/models/subtask.py b/src/core/tasks/url/operators/location_id/subtasks/models/subtask.py index b06d2ff9..fa935ecb 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/models/subtask.py +++ b/src/core/tasks/url/operators/location_id/subtasks/models/subtask.py @@ -1,7 +1,7 @@ from pydantic import BaseModel from src.core.tasks.url.operators.location_id.subtasks.models.suggestion import LocationSuggestion -from src.db.models.impl.url.suggestion.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic +from src.db.models.impl.annotation.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic class AutoLocationIDSubtaskData(BaseModel): diff --git a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/constants.py b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/constants.py index b9f85e2d..f3093b03 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/constants.py +++ b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/constants.py @@ -1,5 +1,5 @@ # Determines priority of subtasks, all else being equal. -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType SUBTASK_HIERARCHY: list[LocationIDSubtaskType] = [ LocationIDSubtaskType.NLP_LOCATION_FREQUENCY, diff --git a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/core.py b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/core.py index c267b89e..44cb0627 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/core.py +++ b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/core.py @@ -6,7 +6,7 @@ from src.core.tasks.url.operators.location_id.subtasks.queries.survey.constants import SUBTASK_HIERARCHY_MAPPING from src.core.tasks.url.operators.location_id.subtasks.queries.survey.queries.eligible_counts import \ ELIGIBLE_COUNTS_QUERY -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType from src.db.queries.base.builder import QueryBuilderBase from src.db.helpers.session import session_helper as sh diff --git a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/exists/high_confidence_annotations.py b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/exists/high_confidence_annotations.py index 7d0dddfd..668e9e69 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/exists/high_confidence_annotations.py +++ b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/exists/high_confidence_annotations.py @@ -2,24 +2,24 @@ from src.core.tasks.url.operators._shared.container.subtask.exists import \ URLsSubtaskExistsCTEContainer +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion cte = ( select( URL.id ) .join( - AutoLocationIDSubtask, - AutoLocationIDSubtask.url_id == URL.id, + AnnotationLocationAutoSubtask, + AnnotationLocationAutoSubtask.url_id == URL.id, ) .join( - LocationIDSubtaskSuggestion, - LocationIDSubtaskSuggestion.subtask_id == AutoLocationIDSubtask.id, + AnnotationLocationAutoSuggestion, + AnnotationLocationAutoSuggestion.subtask_id == AnnotationLocationAutoSubtask.id, ) .where( - LocationIDSubtaskSuggestion.confidence >= 95, + AnnotationLocationAutoSuggestion.confidence >= 95, ) .cte("high_confidence_annotations_exists") ) diff --git a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/helpers.py b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/helpers.py index acd73c4b..54f114b8 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/helpers.py +++ b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/helpers.py @@ -1,8 +1,8 @@ from sqlalchemy import ColumnElement, exists +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask def get_exists_subtask_query( @@ -11,8 +11,8 @@ def get_exists_subtask_query( return ( exists() .where( - AutoLocationIDSubtask.url_id == URL.id, - AutoLocationIDSubtask.type == subtask_type, + AnnotationLocationAutoSubtask.url_id == URL.id, + AnnotationLocationAutoSubtask.type == subtask_type, ) .label("subtask_entry_exists") ) \ No newline at end of file diff --git a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/impl/batch_link.py b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/impl/batch_link.py index 14c2f260..6d08cc76 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/impl/batch_link.py +++ b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/impl/batch_link.py @@ -3,10 +3,10 @@ from src.core.tasks.url.operators._shared.container.subtask.eligible import URLsSubtaskEligibleCTEContainer from src.core.tasks.url.operators.location_id.subtasks.queries.survey.queries.ctes.subtask.helpers import \ get_exists_subtask_query +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.link.location_batch.sqlalchemy import LinkLocationBatch from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType cte = ( select( diff --git a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location_freq.py b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location_freq.py index 7ab2e0eb..72b4cd81 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location_freq.py +++ b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/ctes/subtask/impl/nlp_location_freq.py @@ -3,9 +3,9 @@ from src.core.tasks.url.operators._shared.container.subtask.eligible import URLsSubtaskEligibleCTEContainer from src.core.tasks.url.operators.location_id.subtasks.queries.survey.queries.ctes.subtask.helpers import \ get_exists_subtask_query +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType cte = ( select( diff --git a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/eligible_counts.py b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/eligible_counts.py index b803b7f2..97c47a33 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/eligible_counts.py +++ b/src/core/tasks/url/operators/location_id/subtasks/queries/survey/queries/eligible_counts.py @@ -1,7 +1,7 @@ from sqlalchemy import ColumnElement, func, Integer, select from src.core.tasks.url.operators.location_id.subtasks.queries.survey.queries.ctes.eligible import EligibleContainer -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType def sum_count(col: ColumnElement[bool], subtask_type: LocationIDSubtaskType) -> ColumnElement[int]: diff --git a/src/core/tasks/url/operators/location_id/subtasks/templates/subtask.py b/src/core/tasks/url/operators/location_id/subtasks/templates/subtask.py index 8ee856c2..a5fb050b 100644 --- a/src/core/tasks/url/operators/location_id/subtasks/templates/subtask.py +++ b/src/core/tasks/url/operators/location_id/subtasks/templates/subtask.py @@ -7,8 +7,8 @@ from src.core.tasks.url.operators.location_id.subtasks.models.suggestion import LocationSuggestion from src.db.client.async_ import AsyncDatabaseClient from src.db.enums import TaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic -from src.db.models.impl.url.suggestion.location.auto.suggestion.pydantic import LocationIDSubtaskSuggestionPydantic +from src.db.models.impl.annotation.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic +from src.db.models.impl.annotation.location.auto.suggestion.pydantic import LocationIDSubtaskSuggestionPydantic from src.db.models.impl.url.task_error.pydantic_.insert import URLTaskErrorPydantic from src.db.models.impl.url.task_error.pydantic_.small import URLTaskErrorSmall diff --git a/src/core/tasks/url/operators/record_type/core.py b/src/core/tasks/url/operators/record_type/core.py index 9f63a6a5..d6097ab0 100644 --- a/src/core/tasks/url/operators/record_type/core.py +++ b/src/core/tasks/url/operators/record_type/core.py @@ -7,7 +7,7 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.with_html import URLWithHTML from src.db.enums import TaskType -from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType from src.db.models.impl.url.task_error.pydantic_.small import URLTaskErrorSmall @@ -72,9 +72,9 @@ async def put_results_into_database( record_type = tdo.record_type url_and_record_type_list.append((url_id, record_type)) # Add to database - suggestions: list[AutoRecordTypeSuggestion] = [] + suggestions: list[AnnotationAutoRecordType] = [] for url_id, record_type in url_and_record_type_list: - suggestion = AutoRecordTypeSuggestion( + suggestion = AnnotationAutoRecordType( url_id=url_id, record_type=record_type.value ) diff --git a/src/core/tasks/url/operators/record_type/queries/cte.py b/src/core/tasks/url/operators/record_type/queries/cte.py index 22d3db10..710dab03 100644 --- a/src/core/tasks/url/operators/record_type/queries/cte.py +++ b/src/core/tasks/url/operators/record_type/queries/cte.py @@ -4,7 +4,7 @@ from src.db.helpers.query import not_exists_url, no_url_task_error from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML -from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType class RecordTypeTaskPrerequisiteCTEContainer: @@ -18,7 +18,7 @@ def __init__(self): URLCompressedHTML ) .where( - not_exists_url(AutoRecordTypeSuggestion), + not_exists_url(AnnotationAutoRecordType), no_url_task_error( TaskType.RECORD_TYPE ) diff --git a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/agency.py b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/agency.py index 36fe0a87..440e908a 100644 --- a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/agency.py +++ b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/agency.py @@ -2,31 +2,31 @@ from src.core.tasks.url.operators.validate.queries.ctes.counts.constants import ANONYMOUS_VOTE_RATIO from src.core.tasks.url.operators.validate.queries.ctes.counts.core import ValidatedCountsCTEContainer -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser from src.db.models.views.unvalidated_url import UnvalidatedURL _user_counts = ( select( - UserURLAgencySuggestion.url_id, - UserURLAgencySuggestion.agency_id.label("entity"), + AnnotationAgencyUser.url_id, + AnnotationAgencyUser.agency_id.label("entity"), func.count().label("votes") ) .group_by( - UserURLAgencySuggestion.url_id, - UserURLAgencySuggestion.agency_id + AnnotationAgencyUser.url_id, + AnnotationAgencyUser.agency_id ) ) _anon_counts = ( select( - AnonymousAnnotationAgency.url_id, - AnonymousAnnotationAgency.agency_id.label("entity"), + AnnotationAgencyAnon.url_id, + AnnotationAgencyAnon.agency_id.label("entity"), (func.count() / ANONYMOUS_VOTE_RATIO).label("votes") ) .group_by( - AnonymousAnnotationAgency.url_id, - AnonymousAnnotationAgency.agency_id + AnnotationAgencyAnon.url_id, + AnnotationAgencyAnon.agency_id ) ) diff --git a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/location.py b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/location.py index 4e180e18..496b14e1 100644 --- a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/location.py +++ b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/location.py @@ -2,32 +2,31 @@ from src.core.tasks.url.operators.validate.queries.ctes.counts.constants import ANONYMOUS_VOTE_RATIO from src.core.tasks.url.operators.validate.queries.ctes.counts.core import ValidatedCountsCTEContainer -from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.views.unvalidated_url import UnvalidatedURL _user_counts = ( select( - UserLocationSuggestion.url_id, - UserLocationSuggestion.location_id.label("entity"), + AnnotationLocationUser.url_id, + AnnotationLocationUser.location_id.label("entity"), func.count().label("votes") ) .group_by( - UserLocationSuggestion.url_id, - UserLocationSuggestion.location_id + AnnotationLocationUser.url_id, + AnnotationLocationUser.location_id ) ) _anon_counts = ( select( - AnonymousAnnotationLocation.url_id, - AnonymousAnnotationLocation.location_id.label("entity"), + AnnotationLocationAnon.url_id, + AnnotationLocationAnon.location_id.label("entity"), (func.count() / ANONYMOUS_VOTE_RATIO).label("votes") ) .group_by( - AnonymousAnnotationLocation.url_id, - AnonymousAnnotationLocation.location_id + AnnotationLocationAnon.url_id, + AnnotationLocationAnon.location_id ) ) diff --git a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py index 5cb014f1..606105d0 100644 --- a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py +++ b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/name.py @@ -1,28 +1,76 @@ from sqlalchemy import select, func from src.core.tasks.url.operators.validate.queries.ctes.counts.core import ValidatedCountsCTEContainer -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.models.views.unvalidated_url import UnvalidatedURL +_user_counts = ( + select( + AnnotationNameSuggestion.url_id, + AnnotationNameSuggestion.suggestion.label("entity"), + func.count().label("votes") + ) + .join( + AnnotationNameUserEndorsement, + AnnotationNameUserEndorsement.suggestion_id == AnnotationNameSuggestion.id + ) + .group_by( + AnnotationNameSuggestion.url_id, + AnnotationNameSuggestion.suggestion + ) + .cte("user_counts") +) + +_anon_counts = ( + select( + AnnotationNameSuggestion.url_id, + AnnotationNameSuggestion.suggestion.label("entity"), + func.count().label("votes") + ) + .join( + AnnotationNameAnonEndorsement, + AnnotationNameAnonEndorsement.suggestion_id == AnnotationNameSuggestion.id + ) + .group_by( + AnnotationNameSuggestion.url_id, + AnnotationNameSuggestion.suggestion + ) + .cte("anon_counts") +) + +_union_counts = ( + select( + _user_counts.c.url_id, + _user_counts.c.entity, + _user_counts.c.votes + ) + .union_all( + select( + _anon_counts.c.url_id, + _anon_counts.c.entity, + _anon_counts.c.votes + ) + ) + .cte("counts_name_union") +) + + NAME_VALIDATION_COUNTS_CTE = ValidatedCountsCTEContainer( ( select( - URLNameSuggestion.url_id, - URLNameSuggestion.suggestion.label("entity"), - func.count().label("votes") + _union_counts.c.url_id, + _union_counts.c.entity, + func.sum(_union_counts.c.votes).label("votes") ) .join( UnvalidatedURL, - URLNameSuggestion.url_id == UnvalidatedURL.url_id - ) - .join( - LinkUserNameSuggestion, - LinkUserNameSuggestion.suggestion_id == URLNameSuggestion.id + _union_counts.c.url_id == UnvalidatedURL.url_id ) .group_by( - URLNameSuggestion.url_id, - URLNameSuggestion.suggestion + _union_counts.c.url_id, + _union_counts.c.entity, ) ).cte("counts_name") ) \ No newline at end of file diff --git a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/record_type.py b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/record_type.py index 65b1f9b0..19455587 100644 --- a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/record_type.py +++ b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/record_type.py @@ -2,31 +2,31 @@ from src.core.tasks.url.operators.validate.queries.ctes.counts.constants import ANONYMOUS_VOTE_RATIO from src.core.tasks.url.operators.validate.queries.ctes.counts.core import ValidatedCountsCTEContainer -from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.views.unvalidated_url import UnvalidatedURL _user_counts = ( select( - UserRecordTypeSuggestion.url_id, - UserRecordTypeSuggestion.record_type.label("entity"), + AnnotationRecordTypeUser.url_id, + AnnotationRecordTypeUser.record_type.label("entity"), func.count().label("votes") ) .group_by( - UserRecordTypeSuggestion.url_id, - UserRecordTypeSuggestion.record_type + AnnotationRecordTypeUser.url_id, + AnnotationRecordTypeUser.record_type ) ) _anon_counts = ( select( - AnonymousAnnotationRecordType.url_id, - AnonymousAnnotationRecordType.record_type.label("entity"), + AnnotationRecordTypeAnon.url_id, + AnnotationRecordTypeAnon.record_type.label("entity"), (func.count() * ANONYMOUS_VOTE_RATIO).label("votes") ) .group_by( - AnonymousAnnotationRecordType.url_id, - AnonymousAnnotationRecordType.record_type + AnnotationRecordTypeAnon.url_id, + AnnotationRecordTypeAnon.record_type ) ) diff --git a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/url_type.py b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/url_type.py index 72638f19..bd92fdb8 100644 --- a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/url_type.py +++ b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/url_type.py @@ -2,31 +2,31 @@ from src.core.tasks.url.operators.validate.queries.ctes.counts.constants import ANONYMOUS_VOTE_RATIO from src.core.tasks.url.operators.validate.queries.ctes.counts.core import ValidatedCountsCTEContainer -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.views.unvalidated_url import UnvalidatedURL _user_counts = ( select( - UserURLTypeSuggestion.url_id, - UserURLTypeSuggestion.type.label("entity"), + AnnotationURLTypeUser.url_id, + AnnotationURLTypeUser.type.label("entity"), func.count().label("votes") ) .group_by( - UserURLTypeSuggestion.url_id, - UserURLTypeSuggestion.type + AnnotationURLTypeUser.url_id, + AnnotationURLTypeUser.type ) ) _anon_counts = ( select( - AnonymousAnnotationURLType.url_id, - AnonymousAnnotationURLType.url_type.label("entity"), + AnnotationURLTypeAnon.url_id, + AnnotationURLTypeAnon.url_type.label("entity"), (func.count() / ANONYMOUS_VOTE_RATIO).label("votes") ) .group_by( - AnonymousAnnotationURLType.url_id, - AnonymousAnnotationURLType.url_type + AnnotationURLTypeAnon.url_id, + AnnotationURLTypeAnon.url_type ) ) diff --git a/src/db/client/async_.py b/src/db/client/async_.py index 125c594e..6377fa60 100644 --- a/src/db/client/async_.py +++ b/src/db/client/async_.py @@ -1,7 +1,6 @@ from datetime import datetime from functools import wraps from typing import Optional, Any, List -from uuid import UUID, uuid4 from sqlalchemy import select, func, Select, and_, update, Row, text from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker, AsyncEngine @@ -37,26 +36,27 @@ from src.api.endpoints.task.dtos.get.tasks import GetTasksResponse, GetTasksResponseTaskInfo from src.api.endpoints.url.get.dto import GetURLsResponseInfo from src.api.endpoints.url.get.query import GetURLsQueryBuilder -from src.collectors.enums import URLStatus, CollectorType +from src.collectors.enums import CollectorType from src.collectors.queries.insert.urls.query import InsertURLsQueryBuilder from src.core.enums import BatchStatus, RecordType from src.core.env_var_manager import EnvVarManager from src.core.tasks.scheduled.impl.huggingface.queries.state import SetHuggingFaceUploadStateQueryBuilder from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo -from src.core.tasks.url.operators.html.queries.get import \ +from src.core.tasks.url.operators.html.queries.get.query import \ GetPendingURLsWithoutHTMLDataQueryBuilder from src.core.tasks.url.operators.misc_metadata.tdo import URLMiscellaneousMetadataTDO from src.db.client.helpers import add_standard_limit_and_offset from src.db.client.types import UserSuggestionModel from src.db.config_manager import ConfigManager from src.db.constants import PLACEHOLDER_AGENCY_NAME -from src.db.dtos.url.html_content import URLHTMLContentInfo from src.db.dtos.url.insert import InsertURLsInfo from src.db.dtos.url.raw_html import RawHTMLInfo from src.db.enums import TaskType from src.db.helpers.session import session_helper as sh from src.db.models.impl.agency.enums import AgencyType, JurisdictionType from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.url_type.auto.pydantic.input import AutoRelevancyAnnotationInput from src.db.models.impl.backlog_snapshot import BacklogSnapshot from src.db.models.impl.batch.pydantic.info import BatchInfo from src.db.models.impl.batch.sqlalchemy import Batch @@ -75,19 +75,15 @@ from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML -from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.anonymous import AnonymousSession -from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.auto.pydantic.input import AutoRelevancyAnnotationInput -from src.db.models.impl.url.suggestion.url_type.auto.sqlalchemy import AutoRelevantSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.models.templates_.base import Base -from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum +from src.db.models.materialized_views.batch_url_status.enums import BatchURLStatusViewEnum from src.db.queries.base.builder import QueryBuilderBase from src.db.queries.implementations.core.get.recent_batch_summaries.builder import GetRecentBatchSummariesQueryBuilder from src.db.queries.implementations.core.metrics.urls.aggregated.pending import \ @@ -231,7 +227,7 @@ async def add_user_relevant_suggestions( inputs: list[AutoRelevancyAnnotationInput] ): models = [ - AutoRelevantSuggestion( + AnnotationAutoURLType( url_id=input_.url_id, relevant=input_.is_relevant, confidence=input_.confidence, @@ -267,15 +263,15 @@ async def add_user_relevant_suggestion( ): prior_suggestion = await self.get_user_suggestion( session, - model=UserURLTypeSuggestion, + model=AnnotationURLTypeUser, user_id=user_id, url_id=url_id ) if prior_suggestion is not None: - prior_suggestion.type = suggested_status.value + prior_suggestion.agency_type = suggested_status.value return - suggestion = UserURLTypeSuggestion( + suggestion = AnnotationURLTypeUser( url_id=url_id, user_id=user_id, type=suggested_status.value @@ -292,7 +288,7 @@ async def add_auto_record_type_suggestion( url_id: int, record_type: RecordType ): - suggestion = AutoRecordTypeSuggestion( + suggestion = AnnotationAutoRecordType( url_id=url_id, record_type=record_type.value ) @@ -308,7 +304,7 @@ async def add_user_record_type_suggestion( ): prior_suggestion = await self.get_user_suggestion( session, - model=UserRecordTypeSuggestion, + model=AnnotationRecordTypeUser, user_id=user_id, url_id=url_id ) @@ -316,7 +312,7 @@ async def add_user_record_type_suggestion( prior_suggestion.record_type = record_type.value return - suggestion = UserRecordTypeSuggestion( + suggestion = AnnotationRecordTypeUser( url_id=url_id, user_id=user_id, record_type=record_type.value @@ -325,14 +321,6 @@ async def add_user_record_type_suggestion( # endregion record_type - - @session_manager - async def has_non_errored_urls_without_html_data(self, session: AsyncSession) -> bool: - statement = self.statement_composer.has_non_errored_urls_without_html_data() - statement = statement.limit(1) - scalar_result = await session.scalars(statement) - return bool(scalar_result.first()) - @session_manager async def add_miscellaneous_metadata(self, session: AsyncSession, tdos: list[URLMiscellaneousMetadataTDO]): updates = [] @@ -570,7 +558,7 @@ async def add_agency_manual_suggestion( ) await session.merge(agency) - url_agency_suggestion = UserURLAgencySuggestion( + url_agency_suggestion = AnnotationAgencyUser( url_id=url_id, agency_id=agency_id, user_id=user_id, @@ -704,7 +692,7 @@ async def get_batch_summaries( session, page: int, collector_type: CollectorType | None = None, - status: BatchURLStatusEnum | None = None, + status: BatchURLStatusViewEnum | None = None, ) -> GetBatchSummariesResponse: # Get only the batch_id, collector_type, status, and created_at builder = GetRecentBatchSummariesQueryBuilder( @@ -835,7 +823,6 @@ async def populate_backlog_snapshot( ) .outerjoin(FlagURLValidated, URL.id == FlagURLValidated.url_id) .where( - URL.status == URLStatus.OK.value, FlagURLValidated.url_id.is_(None), ) ) diff --git a/src/db/client/sync.py b/src/db/client/sync.py index e29909cf..c5d90167 100644 --- a/src/db/client/sync.py +++ b/src/db/client/sync.py @@ -124,7 +124,6 @@ def insert_url(self, session, url_info: URLInfo) -> int: url=url_and_scheme.url, scheme=url_and_scheme.scheme, collector_metadata=url_info.collector_metadata, - status=url_info.status, name=url_info.name, trailing_slash=url_and_scheme.url.endswith('/'), source=url_info.source diff --git a/src/db/client/types.py b/src/db/client/types.py index e4f70301..0f132aeb 100644 --- a/src/db/client/types.py +++ b/src/db/client/types.py @@ -1,5 +1,5 @@ -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser -UserSuggestionModel = UserURLTypeSuggestion or UserRecordTypeSuggestion or UserURLAgencySuggestion +UserSuggestionModel = AnnotationURLTypeUser or AnnotationRecordTypeUser or AnnotationAgencyUser diff --git a/src/db/constants.py b/src/db/constants.py index c8821e7e..cf073fb7 100644 --- a/src/db/constants.py +++ b/src/db/constants.py @@ -1,13 +1,13 @@ -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser PLACEHOLDER_AGENCY_NAME = "PLACEHOLDER_AGENCY_NAME" STANDARD_ROW_LIMIT = 100 USER_ANNOTATION_MODELS = [ - UserURLTypeSuggestion, - UserRecordTypeSuggestion, - UserURLAgencySuggestion + AnnotationURLTypeUser, + AnnotationRecordTypeUser, + AnnotationAgencyUser ] \ No newline at end of file diff --git a/src/db/dto_converter.py b/src/db/dto_converter.py index 4c91a353..eec2f32c 100644 --- a/src/db/dto_converter.py +++ b/src/db/dto_converter.py @@ -1,23 +1,19 @@ from collections import Counter -from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo from src.api.endpoints.annotate.relevance.get.dto import RelevanceAnnotationResponseInfo -from src.api.endpoints.review.next.dto import FinalReviewAnnotationRelevantInfo, FinalReviewAnnotationRecordTypeInfo, \ - FinalReviewAnnotationAgencyInfo -from src.core.enums import RecordType, SuggestionType +from src.api.endpoints.review.next.dto import FinalReviewAnnotationRelevantInfo, FinalReviewAnnotationRecordTypeInfo +from src.core.enums import RecordType from src.core.tasks.url.operators.html.scraper.parser.dtos.response_html import ResponseHTMLInfo from src.core.tasks.url.operators.html.scraper.parser.mapping import ENUM_TO_ATTRIBUTE_MAPPING from src.db.dtos.url.html_content import URLHTMLContentInfo from src.db.dtos.url.with_html import URLWithHTML -from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.html.content.enums import HTMLContentType from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.auto.sqlalchemy import AutoRelevantSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser class DTOConverter: @@ -28,8 +24,8 @@ class DTOConverter: @staticmethod def final_review_annotation_relevant_info( - user_suggestions: list[UserURLTypeSuggestion], - auto_suggestion: AutoRelevantSuggestion + user_suggestions: list[AnnotationURLTypeUser], + auto_suggestion: AnnotationAutoURLType ) -> FinalReviewAnnotationRelevantInfo: auto_value = RelevanceAnnotationResponseInfo( @@ -48,8 +44,8 @@ def final_review_annotation_relevant_info( @staticmethod def final_review_annotation_record_type_info( - user_suggestions: list[UserRecordTypeSuggestion], - auto_suggestion: AutoRecordTypeSuggestion + user_suggestions: list[AnnotationRecordTypeUser], + auto_suggestion: AnnotationAutoRecordType ): if auto_suggestion is None: diff --git a/src/db/enums.py b/src/db/enums.py index 65f446c5..97e2cc4b 100644 --- a/src/db/enums.py +++ b/src/db/enums.py @@ -75,6 +75,7 @@ class TaskType(PyEnum): SYNC_META_URLS_ADD = "Sync Meta URLs Add" SYNC_META_URLS_UPDATE = "Sync Meta URLs Update" SYNC_META_URLS_DELETE = "Sync Meta URLs Delete" + SYNC_USER_FOLLOWS_GET = "Sync User Follows Get" class ChangeLogOperationType(PyEnum): INSERT = "INSERT" diff --git a/src/db/models/impl/__init__.py b/src/db/models/impl/__init__.py index e69de29b..5b4c9604 100644 --- a/src/db/models/impl/__init__.py +++ b/src/db/models/impl/__init__.py @@ -0,0 +1,4 @@ + +from .link.location_batch.sqlalchemy import LinkLocationBatch +from .link.batch_url.sqlalchemy import LinkBatchURL +from .anon_session.sqlalchemy import AnonymousSession \ No newline at end of file diff --git a/src/db/models/impl/agency/sqlalchemy.py b/src/db/models/impl/agency/sqlalchemy.py index 9e99a0be..d6ccbc3b 100644 --- a/src/db/models/impl/agency/sqlalchemy.py +++ b/src/db/models/impl/agency/sqlalchemy.py @@ -2,7 +2,7 @@ References an agency in the data sources database. """ -from sqlalchemy import Column, Integer, String, DateTime, Sequence +from sqlalchemy import Column, String from sqlalchemy.orm import relationship, Mapped from src.db.models.helpers import enum_column @@ -27,8 +27,8 @@ class Agency( ) # Relationships - automated_suggestions = relationship("AgencyIDSubtaskSuggestion") - user_suggestions = relationship("UserURLAgencySuggestion", back_populates="agency") + automated_suggestions = relationship("AnnotationAgencyAutoSuggestion") + user_suggestions = relationship("AnnotationAgencyUser", back_populates="agency") confirmed_urls = relationship("LinkURLAgency", back_populates="agency") locations = relationship( diff --git a/src/db/models/impl/url/suggestion/README.md b/src/db/models/impl/annotation/README.md similarity index 100% rename from src/db/models/impl/url/suggestion/README.md rename to src/db/models/impl/annotation/README.md diff --git a/src/db/models/impl/url/suggestion/url_type/auto/pydantic/__init__.py b/src/db/models/impl/annotation/__init__.py similarity index 100% rename from src/db/models/impl/url/suggestion/url_type/auto/pydantic/__init__.py rename to src/db/models/impl/annotation/__init__.py diff --git a/src/db/models/views/batch_url_status/__init__.py b/src/db/models/impl/annotation/agency/__init__.py similarity index 100% rename from src/db/models/views/batch_url_status/__init__.py rename to src/db/models/impl/annotation/agency/__init__.py diff --git a/src/db/models/views/url_status/__init__.py b/src/db/models/impl/annotation/agency/anon/__init__.py similarity index 100% rename from src/db/models/views/url_status/__init__.py rename to src/db/models/impl/annotation/agency/anon/__init__.py diff --git a/src/db/models/impl/url/suggestion/anonymous/agency/sqlalchemy.py b/src/db/models/impl/annotation/agency/anon/sqlalchemy.py similarity index 83% rename from src/db/models/impl/url/suggestion/anonymous/agency/sqlalchemy.py rename to src/db/models/impl/annotation/agency/anon/sqlalchemy.py index 6f750289..a2da332b 100644 --- a/src/db/models/impl/url/suggestion/anonymous/agency/sqlalchemy.py +++ b/src/db/models/impl/annotation/agency/anon/sqlalchemy.py @@ -4,14 +4,14 @@ from src.db.models.templates_.base import Base -class AnonymousAnnotationAgency( +class AnnotationAgencyAnon( Base, URLDependentMixin, AgencyDependentMixin, CreatedAtMixin, AnonymousSessionMixin ): - __tablename__ = "anonymous_annotation_agency" + __tablename__ = "annotation__agency__anon" __table_args__ = ( PrimaryKeyConstraint("session_id", "url_id", "agency_id"), ) \ No newline at end of file diff --git a/tests/automated/integration/tasks/scheduled/impl/update_url_status/__init__.py b/src/db/models/impl/annotation/agency/auto/__init__.py similarity index 100% rename from tests/automated/integration/tasks/scheduled/impl/update_url_status/__init__.py rename to src/db/models/impl/annotation/agency/auto/__init__.py diff --git a/tests/automated/integration/tasks/url/impl/html/check/__init__.py b/src/db/models/impl/annotation/agency/auto/subtask/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/impl/html/check/__init__.py rename to src/db/models/impl/annotation/agency/auto/subtask/__init__.py diff --git a/src/db/models/impl/url/suggestion/agency/subtask/enum.py b/src/db/models/impl/annotation/agency/auto/subtask/enum.py similarity index 100% rename from src/db/models/impl/url/suggestion/agency/subtask/enum.py rename to src/db/models/impl/annotation/agency/auto/subtask/enum.py diff --git a/src/db/models/impl/url/suggestion/agency/subtask/pydantic.py b/src/db/models/impl/annotation/agency/auto/subtask/pydantic.py similarity index 61% rename from src/db/models/impl/url/suggestion/agency/subtask/pydantic.py rename to src/db/models/impl/annotation/agency/auto/subtask/pydantic.py index f2e9be57..4faee30d 100644 --- a/src/db/models/impl/url/suggestion/agency/subtask/pydantic.py +++ b/src/db/models/impl/annotation/agency/auto/subtask/pydantic.py @@ -1,5 +1,5 @@ -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask from src.db.models.templates_.base import Base from src.db.templates.markers.bulk.insert import BulkInsertableModel @@ -14,4 +14,4 @@ class URLAutoAgencyIDSubtaskPydantic(BulkInsertableModel): @classmethod def sa_model(cls) -> type_alias[Base]: - return URLAutoAgencyIDSubtask \ No newline at end of file + return AnnotationAgencyAutoSubtask \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py b/src/db/models/impl/annotation/agency/auto/subtask/sqlalchemy.py similarity index 76% rename from src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py rename to src/db/models/impl/annotation/agency/auto/subtask/sqlalchemy.py index 7a297ef1..56383a6a 100644 --- a/src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py +++ b/src/db/models/impl/annotation/agency/auto/subtask/sqlalchemy.py @@ -1,20 +1,20 @@ from sqlalchemy.orm import relationship, Mapped from src.db.models.helpers import enum_column -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode from src.db.models.mixins import URLDependentMixin, CreatedAtMixin, TaskDependentMixin from src.db.models.templates_.with_id import WithIDBase import sqlalchemy as sa -class URLAutoAgencyIDSubtask( +class AnnotationAgencyAutoSubtask( WithIDBase, URLDependentMixin, TaskDependentMixin, CreatedAtMixin ): - __tablename__ = "url_auto_agency_id_subtasks" + __tablename__ = "annotation__agency__auto__subtasks" type: Mapped[AutoAgencyIDSubtaskType] = enum_column( AutoAgencyIDSubtaskType, @@ -30,6 +30,6 @@ class URLAutoAgencyIDSubtask( ) suggestions = relationship( - "AgencyIDSubtaskSuggestion", + "AnnotationAgencyAutoSuggestion", cascade="all, delete-orphan" ) \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/__init__.py b/src/db/models/impl/annotation/agency/auto/suggestion/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/__init__.py rename to src/db/models/impl/annotation/agency/auto/suggestion/__init__.py diff --git a/src/db/models/impl/url/suggestion/agency/suggestion/pydantic.py b/src/db/models/impl/annotation/agency/auto/suggestion/pydantic.py similarity index 69% rename from src/db/models/impl/url/suggestion/agency/suggestion/pydantic.py rename to src/db/models/impl/annotation/agency/auto/suggestion/pydantic.py index 5a0fd2b8..1ec38502 100644 --- a/src/db/models/impl/url/suggestion/agency/suggestion/pydantic.py +++ b/src/db/models/impl/annotation/agency/auto/suggestion/pydantic.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from src.db.models.templates_.base import Base from src.db.templates.markers.bulk.insert import BulkInsertableModel @@ -13,4 +13,4 @@ class AgencyIDSubtaskSuggestionPydantic( @classmethod def sa_model(cls) -> type[Base]: """Defines the SQLAlchemy model.""" - return AgencyIDSubtaskSuggestion \ No newline at end of file + return AnnotationAgencyAutoSuggestion \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py b/src/db/models/impl/annotation/agency/auto/suggestion/sqlalchemy.py similarity index 77% rename from src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py rename to src/db/models/impl/annotation/agency/auto/suggestion/sqlalchemy.py index 3f8b8186..5cb715a5 100644 --- a/src/db/models/impl/url/suggestion/agency/suggestion/sqlalchemy.py +++ b/src/db/models/impl/annotation/agency/auto/suggestion/sqlalchemy.py @@ -5,17 +5,17 @@ from src.db.models.templates_.with_id import WithIDBase -class AgencyIDSubtaskSuggestion( +class AnnotationAgencyAutoSuggestion( WithIDBase, CreatedAtMixin, AgencyDependentMixin, ): - __tablename__ = "agency_id_subtask_suggestions" + __tablename__ = "annotation__agency__auto__suggestions" subtask_id = sa.Column( sa.Integer, - sa.ForeignKey("url_auto_agency_id_subtasks.id"), + sa.ForeignKey("annotation__agency__auto__subtasks.id"), nullable=False ) confidence = sa.Column( diff --git a/tests/automated/integration/tasks/url/impl/html/setup/__init__.py b/src/db/models/impl/annotation/agency/user/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/impl/html/setup/__init__.py rename to src/db/models/impl/annotation/agency/user/__init__.py diff --git a/src/db/models/impl/url/suggestion/agency/user.py b/src/db/models/impl/annotation/agency/user/sqlalchemy.py similarity index 79% rename from src/db/models/impl/url/suggestion/agency/user.py rename to src/db/models/impl/annotation/agency/user/sqlalchemy.py index 79fa933c..6b00e06c 100644 --- a/src/db/models/impl/url/suggestion/agency/user.py +++ b/src/db/models/impl/annotation/agency/user/sqlalchemy.py @@ -4,11 +4,10 @@ from src.db.models.helpers import get_agency_id_foreign_column from src.db.models.mixins import URLDependentMixin from src.db.models.templates_.base import Base -from src.db.models.templates_.with_id import WithIDBase -class UserURLAgencySuggestion(URLDependentMixin, Base): - __tablename__ = "user_url_agency_suggestions" +class AnnotationAgencyUser(URLDependentMixin, Base): + __tablename__ = "annotation__agency__user" __table_args__ = ( PrimaryKeyConstraint("agency_id", "url_id", "user_id"), ) diff --git a/tests/automated/integration/tasks/url/impl/html/setup/models/__init__.py b/src/db/models/impl/annotation/location/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/impl/html/setup/models/__init__.py rename to src/db/models/impl/annotation/location/__init__.py diff --git a/tests/automated/integration/tasks/url/impl/probe/models/__init__.py b/src/db/models/impl/annotation/location/anon/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/impl/probe/models/__init__.py rename to src/db/models/impl/annotation/location/anon/__init__.py diff --git a/src/db/models/impl/url/suggestion/anonymous/location/sqlalchemy.py b/src/db/models/impl/annotation/location/anon/sqlalchemy.py similarity index 82% rename from src/db/models/impl/url/suggestion/anonymous/location/sqlalchemy.py rename to src/db/models/impl/annotation/location/anon/sqlalchemy.py index 3e39810b..6855b021 100644 --- a/src/db/models/impl/url/suggestion/anonymous/location/sqlalchemy.py +++ b/src/db/models/impl/annotation/location/anon/sqlalchemy.py @@ -4,7 +4,7 @@ from src.db.models.templates_.base import Base -class AnonymousAnnotationLocation( +class AnnotationLocationAnon( Base, URLDependentMixin, LocationDependentMixin, @@ -12,7 +12,7 @@ class AnonymousAnnotationLocation( AnonymousSessionMixin ): - __tablename__ = "anonymous_annotation_location" + __tablename__ = "annotation__location__anon" __table_args__ = ( PrimaryKeyConstraint("session_id", "url_id", "location_id"), ) \ No newline at end of file diff --git a/src/db/models/impl/annotation/location/auto/__init__.py b/src/db/models/impl/annotation/location/auto/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/annotation/location/auto/subtask/__init__.py b/src/db/models/impl/annotation/location/auto/subtask/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/suggestion/location/auto/subtask/constants.py b/src/db/models/impl/annotation/location/auto/subtask/constants.py similarity index 100% rename from src/db/models/impl/url/suggestion/location/auto/subtask/constants.py rename to src/db/models/impl/annotation/location/auto/subtask/constants.py diff --git a/src/db/models/impl/url/suggestion/location/auto/subtask/enums.py b/src/db/models/impl/annotation/location/auto/subtask/enums.py similarity index 100% rename from src/db/models/impl/url/suggestion/location/auto/subtask/enums.py rename to src/db/models/impl/annotation/location/auto/subtask/enums.py diff --git a/src/db/models/impl/url/suggestion/location/auto/subtask/pydantic.py b/src/db/models/impl/annotation/location/auto/subtask/pydantic.py similarity index 60% rename from src/db/models/impl/url/suggestion/location/auto/subtask/pydantic.py rename to src/db/models/impl/annotation/location/auto/subtask/pydantic.py index 091a00b9..8bf8c1ed 100644 --- a/src/db/models/impl/url/suggestion/location/auto/subtask/pydantic.py +++ b/src/db/models/impl/annotation/location/auto/subtask/pydantic.py @@ -1,5 +1,5 @@ -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask from src.db.models.templates_.base import Base from src.db.templates.markers.bulk.insert import BulkInsertableModel @@ -16,4 +16,4 @@ class AutoLocationIDSubtaskPydantic( @classmethod def sa_model(cls) -> type[Base]: """Defines the SQLAlchemy model.""" - return AutoLocationIDSubtask \ No newline at end of file + return AnnotationLocationAutoSubtask \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/location/auto/subtask/sqlalchemy.py b/src/db/models/impl/annotation/location/auto/subtask/sqlalchemy.py similarity index 63% rename from src/db/models/impl/url/suggestion/location/auto/subtask/sqlalchemy.py rename to src/db/models/impl/annotation/location/auto/subtask/sqlalchemy.py index b7412d1e..61654851 100644 --- a/src/db/models/impl/url/suggestion/location/auto/subtask/sqlalchemy.py +++ b/src/db/models/impl/annotation/location/auto/subtask/sqlalchemy.py @@ -2,20 +2,20 @@ from sqlalchemy.orm import relationship, Mapped from src.db.models.helpers import enum_column -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion from src.db.models.mixins import CreatedAtMixin, TaskDependentMixin, URLDependentMixin from src.db.models.templates_.with_id import WithIDBase -class AutoLocationIDSubtask( +class AnnotationLocationAutoSubtask( WithIDBase, CreatedAtMixin, TaskDependentMixin, URLDependentMixin, ): - __tablename__ = 'auto_location_id_subtasks' + __tablename__ = 'annotation__location__auto__subtasks' locations_found = Column(Boolean(), nullable=False) type: Mapped[LocationIDSubtaskType] = enum_column( @@ -24,5 +24,5 @@ class AutoLocationIDSubtask( ) suggestions = relationship( - LocationIDSubtaskSuggestion + AnnotationLocationAutoSuggestion ) \ No newline at end of file diff --git a/src/db/models/impl/annotation/location/auto/suggestion/__init__.py b/src/db/models/impl/annotation/location/auto/suggestion/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/suggestion/location/auto/suggestion/pydantic.py b/src/db/models/impl/annotation/location/auto/suggestion/pydantic.py similarity index 68% rename from src/db/models/impl/url/suggestion/location/auto/suggestion/pydantic.py rename to src/db/models/impl/annotation/location/auto/suggestion/pydantic.py index 1ddc53d7..792e3bd4 100644 --- a/src/db/models/impl/url/suggestion/location/auto/suggestion/pydantic.py +++ b/src/db/models/impl/annotation/location/auto/suggestion/pydantic.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion from src.db.models.templates_.base import Base from src.db.templates.markers.bulk.insert import BulkInsertableModel @@ -12,4 +12,4 @@ class LocationIDSubtaskSuggestionPydantic(BulkInsertableModel): @classmethod def sa_model(cls) -> type[Base]: """Defines the SQLAlchemy model.""" - return LocationIDSubtaskSuggestion \ No newline at end of file + return AnnotationLocationAutoSuggestion \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/location/auto/suggestion/sqlalchemy.py b/src/db/models/impl/annotation/location/auto/suggestion/sqlalchemy.py similarity index 78% rename from src/db/models/impl/url/suggestion/location/auto/suggestion/sqlalchemy.py rename to src/db/models/impl/annotation/location/auto/suggestion/sqlalchemy.py index 0d5ea926..f76d9eef 100644 --- a/src/db/models/impl/url/suggestion/location/auto/suggestion/sqlalchemy.py +++ b/src/db/models/impl/annotation/location/auto/suggestion/sqlalchemy.py @@ -5,11 +5,11 @@ from src.db.models.templates_.base import Base -class LocationIDSubtaskSuggestion( +class AnnotationLocationAutoSuggestion( Base, ): - __tablename__ = 'location_id_subtask_suggestions' + __tablename__ = 'annotation__location__auto__suggestions' __table_args__ = ( PrimaryKeyConstraint( 'subtask_id', @@ -19,7 +19,7 @@ class LocationIDSubtaskSuggestion( ) subtask_id = Column( Integer, - ForeignKey('auto_location_id_subtasks.id'), + ForeignKey('annotation__location__auto__subtasks.id'), nullable=False, primary_key=True, ) diff --git a/src/db/models/impl/annotation/location/user/__init__.py b/src/db/models/impl/annotation/location/user/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/suggestion/location/user/pydantic.py b/src/db/models/impl/annotation/location/user/pydantic.py similarity index 70% rename from src/db/models/impl/url/suggestion/location/user/pydantic.py rename to src/db/models/impl/annotation/location/user/pydantic.py index 11f2218b..c3bdcf11 100644 --- a/src/db/models/impl/url/suggestion/location/user/pydantic.py +++ b/src/db/models/impl/annotation/location/user/pydantic.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser from src.db.models.templates_.base import Base from src.db.templates.markers.bulk.insert import BulkInsertableModel @@ -13,4 +13,4 @@ class UserLocationSuggestionPydantic( @classmethod def sa_model(cls) -> type[Base]: """Defines the SQLAlchemy model.""" - return UserLocationSuggestion + return AnnotationLocationUser diff --git a/src/db/models/impl/url/suggestion/location/user/sqlalchemy.py b/src/db/models/impl/annotation/location/user/sqlalchemy.py similarity index 88% rename from src/db/models/impl/url/suggestion/location/user/sqlalchemy.py rename to src/db/models/impl/annotation/location/user/sqlalchemy.py index 18ac3851..614912fd 100644 --- a/src/db/models/impl/url/suggestion/location/user/sqlalchemy.py +++ b/src/db/models/impl/annotation/location/user/sqlalchemy.py @@ -6,13 +6,13 @@ from src.db.models.templates_.base import Base -class UserLocationSuggestion( +class AnnotationLocationUser( Base, CreatedAtMixin, LocationDependentMixin, URLDependentMixin ): - __tablename__ = 'user_location_suggestions' + __tablename__ = 'annotation__location__user' __table_args__ = ( PrimaryKeyConstraint('url_id', 'location_id', 'user_id'), ) diff --git a/src/db/models/impl/annotation/name/__init__.py b/src/db/models/impl/annotation/name/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/annotation/name/anon/__init__.py b/src/db/models/impl/annotation/name/anon/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/annotation/name/anon/sqlalchemy.py b/src/db/models/impl/annotation/name/anon/sqlalchemy.py new file mode 100644 index 00000000..8e24a515 --- /dev/null +++ b/src/db/models/impl/annotation/name/anon/sqlalchemy.py @@ -0,0 +1,24 @@ +from sqlalchemy import PrimaryKeyConstraint, ForeignKey, Integer, Column + +from src.db.models.mixins import CreatedAtMixin, AnonymousSessionMixin +from src.db.models.templates_.base import Base + + +class AnnotationNameAnonEndorsement( + Base, + AnonymousSessionMixin, + CreatedAtMixin +): + __tablename__ = "annotation__name__anon__endorsements" + suggestion_id = Column( + Integer, + ForeignKey("annotation__name__suggestions.id"), + primary_key=True, + nullable=False, + ) + __table_args__ = ( + PrimaryKeyConstraint( + "session_id", + "suggestion_id" + ), + ) \ No newline at end of file diff --git a/src/db/models/impl/annotation/name/suggestion/__init__.py b/src/db/models/impl/annotation/name/suggestion/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/suggestion/name/enums.py b/src/db/models/impl/annotation/name/suggestion/enums.py similarity index 100% rename from src/db/models/impl/url/suggestion/name/enums.py rename to src/db/models/impl/annotation/name/suggestion/enums.py diff --git a/src/db/models/impl/annotation/name/suggestion/pydantic.py b/src/db/models/impl/annotation/name/suggestion/pydantic.py new file mode 100644 index 00000000..55423a0a --- /dev/null +++ b/src/db/models/impl/annotation/name/suggestion/pydantic.py @@ -0,0 +1,17 @@ +from pydantic import Field + +from src.db.models.impl.annotation.location.auto.subtask.constants import MAX_SUGGESTION_LENGTH +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.templates.markers.bulk.insert import BulkInsertableModel + + +class URLNameSuggestionPydantic(BulkInsertableModel): + + url_id: int + suggestion: str = Field(..., max_length=MAX_SUGGESTION_LENGTH) + source: NameSuggestionSource + + @classmethod + def sa_model(cls) -> type[AnnotationNameSuggestion]: + return AnnotationNameSuggestion \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/name/sqlalchemy.py b/src/db/models/impl/annotation/name/suggestion/sqlalchemy.py similarity index 65% rename from src/db/models/impl/url/suggestion/name/sqlalchemy.py rename to src/db/models/impl/annotation/name/suggestion/sqlalchemy.py index 2f11542d..5aeee478 100644 --- a/src/db/models/impl/url/suggestion/name/sqlalchemy.py +++ b/src/db/models/impl/annotation/name/suggestion/sqlalchemy.py @@ -2,19 +2,19 @@ from sqlalchemy.orm import Mapped from src.db.models.helpers import enum_column -from src.db.models.impl.url.suggestion.location.auto.subtask.constants import MAX_SUGGESTION_LENGTH -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource +from src.db.models.impl.annotation.location.auto.subtask.constants import MAX_SUGGESTION_LENGTH +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.mixins import URLDependentMixin, CreatedAtMixin from src.db.models.templates_.with_id import WithIDBase -class URLNameSuggestion( +class AnnotationNameSuggestion( WithIDBase, CreatedAtMixin, URLDependentMixin ): - __tablename__ = "url_name_suggestions" + __tablename__ = "annotation__name__suggestions" suggestion = Column(String(MAX_SUGGESTION_LENGTH), nullable=False) source: Mapped[NameSuggestionSource] = enum_column( diff --git a/src/db/models/impl/annotation/name/user/__init__.py b/src/db/models/impl/annotation/name/user/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/annotation/name/user/pydantic.py b/src/db/models/impl/annotation/name/user/pydantic.py new file mode 100644 index 00000000..fb662bcd --- /dev/null +++ b/src/db/models/impl/annotation/name/user/pydantic.py @@ -0,0 +1,12 @@ +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement +from src.db.templates.markers.bulk.insert import BulkInsertableModel + + +class LinkUserNameSuggestionPydantic(BulkInsertableModel): + + suggestion_id: int + user_id: int + + @classmethod + def sa_model(cls) -> type[AnnotationNameUserEndorsement]: + return AnnotationNameUserEndorsement \ No newline at end of file diff --git a/src/db/models/impl/link/user_name_suggestion/sqlalchemy.py b/src/db/models/impl/annotation/name/user/sqlalchemy.py similarity index 71% rename from src/db/models/impl/link/user_name_suggestion/sqlalchemy.py rename to src/db/models/impl/annotation/name/user/sqlalchemy.py index 316a8e3c..e456d026 100644 --- a/src/db/models/impl/link/user_name_suggestion/sqlalchemy.py +++ b/src/db/models/impl/annotation/name/user/sqlalchemy.py @@ -4,16 +4,16 @@ from src.db.models.templates_.base import Base -class LinkUserNameSuggestion( +class AnnotationNameUserEndorsement( Base, CreatedAtMixin, ): - __tablename__ = "link_user_name_suggestions" + __tablename__ = "annotation__name__user__endorsements" suggestion_id = Column( Integer, - ForeignKey("url_name_suggestions.id"), + ForeignKey("annotation__name__suggestions.id"), primary_key=True, nullable=False, ) diff --git a/src/db/models/impl/annotation/record_type/__init__.py b/src/db/models/impl/annotation/record_type/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/annotation/record_type/anon/__init__.py b/src/db/models/impl/annotation/record_type/anon/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/suggestion/anonymous/record_type/sqlalchemy.py b/src/db/models/impl/annotation/record_type/anon/sqlalchemy.py similarity index 86% rename from src/db/models/impl/url/suggestion/anonymous/record_type/sqlalchemy.py rename to src/db/models/impl/annotation/record_type/anon/sqlalchemy.py index 22f37839..7f7ac028 100644 --- a/src/db/models/impl/url/suggestion/anonymous/record_type/sqlalchemy.py +++ b/src/db/models/impl/annotation/record_type/anon/sqlalchemy.py @@ -7,13 +7,13 @@ from src.db.models.templates_.base import Base -class AnonymousAnnotationRecordType( +class AnnotationRecordTypeAnon( Base, URLDependentMixin, CreatedAtMixin, AnonymousSessionMixin ): - __tablename__ = "anonymous_annotation_record_type" + __tablename__ = "annotation__record_type__anon" __table_args__ = ( PrimaryKeyConstraint("session_id", "url_id", "record_type"), ) diff --git a/src/db/models/impl/annotation/record_type/auto/__init__.py b/src/db/models/impl/annotation/record_type/auto/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/suggestion/record_type/auto.py b/src/db/models/impl/annotation/record_type/auto/sqlalchemy.py similarity index 89% rename from src/db/models/impl/url/suggestion/record_type/auto.py rename to src/db/models/impl/annotation/record_type/auto/sqlalchemy.py index 1c2c68d1..b09f01d8 100644 --- a/src/db/models/impl/url/suggestion/record_type/auto.py +++ b/src/db/models/impl/annotation/record_type/auto/sqlalchemy.py @@ -8,13 +8,13 @@ from src.db.models.types import record_type_values -class AutoRecordTypeSuggestion( +class AnnotationAutoRecordType( UpdatedAtMixin, CreatedAtMixin, URLDependentMixin, Base, ): - __tablename__ = "auto_record_type_suggestions" + __tablename__ = "annotation__record_type__auto" record_type = Column(postgresql.ENUM(*record_type_values, name='record_type'), nullable=False) __table_args__ = ( diff --git a/src/db/models/impl/annotation/record_type/user/__init__.py b/src/db/models/impl/annotation/record_type/user/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/suggestion/record_type/user.py b/src/db/models/impl/annotation/record_type/user/user.py similarity index 90% rename from src/db/models/impl/url/suggestion/record_type/user.py rename to src/db/models/impl/annotation/record_type/user/user.py index 4e271225..c4a84b72 100644 --- a/src/db/models/impl/url/suggestion/record_type/user.py +++ b/src/db/models/impl/annotation/record_type/user/user.py @@ -8,13 +8,13 @@ from src.db.models.types import record_type_values -class UserRecordTypeSuggestion( +class AnnotationRecordTypeUser( UpdatedAtMixin, CreatedAtMixin, URLDependentMixin, Base, ): - __tablename__ = "user_record_type_suggestions" + __tablename__ = "annotation__record_type__user" __table_args__ = ( PrimaryKeyConstraint("url_id", "user_id"), ) diff --git a/src/db/models/impl/annotation/url_type/__init__.py b/src/db/models/impl/annotation/url_type/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/annotation/url_type/anon/__init__.py b/src/db/models/impl/annotation/url_type/anon/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/suggestion/anonymous/url_type/sqlalchemy.py b/src/db/models/impl/annotation/url_type/anon/sqlalchemy.py similarity index 87% rename from src/db/models/impl/url/suggestion/anonymous/url_type/sqlalchemy.py rename to src/db/models/impl/annotation/url_type/anon/sqlalchemy.py index f0cbc6a7..e8a8db18 100644 --- a/src/db/models/impl/url/suggestion/anonymous/url_type/sqlalchemy.py +++ b/src/db/models/impl/annotation/url_type/anon/sqlalchemy.py @@ -7,13 +7,13 @@ from src.db.models.templates_.base import Base -class AnonymousAnnotationURLType( +class AnnotationURLTypeAnon( Base, URLDependentMixin, CreatedAtMixin, AnonymousSessionMixin ): - __tablename__ = "anonymous_annotation_url_type" + __tablename__ = "annotation__url_type__anon" __table_args__ = ( PrimaryKeyConstraint("session_id", "url_id", "url_type"), ) diff --git a/src/db/models/impl/annotation/url_type/auto/__init__.py b/src/db/models/impl/annotation/url_type/auto/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/annotation/url_type/auto/pydantic/__init__.py b/src/db/models/impl/annotation/url_type/auto/pydantic/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/suggestion/url_type/auto/pydantic/input.py b/src/db/models/impl/annotation/url_type/auto/pydantic/input.py similarity index 100% rename from src/db/models/impl/url/suggestion/url_type/auto/pydantic/input.py rename to src/db/models/impl/annotation/url_type/auto/pydantic/input.py diff --git a/src/db/models/impl/url/suggestion/url_type/auto/sqlalchemy.py b/src/db/models/impl/annotation/url_type/auto/sqlalchemy.py similarity index 82% rename from src/db/models/impl/url/suggestion/url_type/auto/sqlalchemy.py rename to src/db/models/impl/annotation/url_type/auto/sqlalchemy.py index 19b5dc09..d882f667 100644 --- a/src/db/models/impl/url/suggestion/url_type/auto/sqlalchemy.py +++ b/src/db/models/impl/annotation/url_type/auto/sqlalchemy.py @@ -6,13 +6,13 @@ from src.db.models.templates_.with_id import WithIDBase -class AutoRelevantSuggestion( +class AnnotationAutoURLType( UpdatedAtMixin, CreatedAtMixin, URLDependentMixin, Base, ): - __tablename__ = "auto_relevant_suggestions" + __tablename__ = "annotation__url_type__auto" relevant = Column(Boolean, nullable=True) confidence = Column(Float, nullable=True) @@ -25,4 +25,4 @@ class AutoRelevantSuggestion( # Relationships - url = relationship("URL", back_populates="auto_relevant_suggestion") + url = relationship("URL") diff --git a/src/db/models/impl/annotation/url_type/user/__init__.py b/src/db/models/impl/annotation/url_type/user/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/suggestion/url_type/user.py b/src/db/models/impl/annotation/url_type/user/sqlalchemy.py similarity index 84% rename from src/db/models/impl/url/suggestion/url_type/user.py rename to src/db/models/impl/annotation/url_type/user/sqlalchemy.py index 52bbc4eb..1d71483a 100644 --- a/src/db/models/impl/url/suggestion/url_type/user.py +++ b/src/db/models/impl/annotation/url_type/user/sqlalchemy.py @@ -9,13 +9,13 @@ from src.db.models.templates_.with_id import WithIDBase -class UserURLTypeSuggestion( +class AnnotationURLTypeUser( UpdatedAtMixin, CreatedAtMixin, URLDependentMixin, Base, ): - __tablename__ = "user_url_type_suggestions" + __tablename__ = "annotation__url_type__user" __table_args__ = ( PrimaryKeyConstraint("url_id", "user_id"), ) @@ -29,4 +29,4 @@ class UserURLTypeSuggestion( # Relationships - url = relationship("URL", back_populates="user_relevant_suggestions") + url = relationship("URL") diff --git a/src/db/models/impl/anon_session/__init__.py b/src/db/models/impl/anon_session/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/suggestion/anonymous/session/sqlalchemy.py b/src/db/models/impl/anon_session/sqlalchemy.py similarity index 100% rename from src/db/models/impl/url/suggestion/anonymous/session/sqlalchemy.py rename to src/db/models/impl/anon_session/sqlalchemy.py diff --git a/src/db/models/impl/link/location__user_follow.py b/src/db/models/impl/link/location__user_follow.py new file mode 100644 index 00000000..a4f65281 --- /dev/null +++ b/src/db/models/impl/link/location__user_follow.py @@ -0,0 +1,20 @@ +from sqlalchemy import Integer, Column, PrimaryKeyConstraint + +from src.db.models.mixins import LocationDependentMixin, CreatedAtMixin +from src.db.models.templates_.base import Base + + +class LinkLocationUserFollow( + Base, + LocationDependentMixin, + CreatedAtMixin +): + __tablename__ = "link__locations__user_follows" + __table_args__ = ( + PrimaryKeyConstraint( + "user_id", + "location_id" + ), + ) + + user_id = Column(Integer, nullable=False) diff --git a/src/db/models/impl/link/user_name_suggestion/pydantic.py b/src/db/models/impl/link/user_name_suggestion/pydantic.py deleted file mode 100644 index 6e07989b..00000000 --- a/src/db/models/impl/link/user_name_suggestion/pydantic.py +++ /dev/null @@ -1,12 +0,0 @@ -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion -from src.db.templates.markers.bulk.insert import BulkInsertableModel - - -class LinkUserNameSuggestionPydantic(BulkInsertableModel): - - suggestion_id: int - user_id: int - - @classmethod - def sa_model(cls) -> type[LinkUserNameSuggestion]: - return LinkUserNameSuggestion \ No newline at end of file diff --git a/src/db/models/impl/proposals/__init__.py b/src/db/models/impl/proposals/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/proposals/agency_/__init__.py b/src/db/models/impl/proposals/agency_/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/proposals/agency_/core.py b/src/db/models/impl/proposals/agency_/core.py new file mode 100644 index 00000000..69172768 --- /dev/null +++ b/src/db/models/impl/proposals/agency_/core.py @@ -0,0 +1,38 @@ +from sqlalchemy import Column, String, Integer, ForeignKey +from sqlalchemy.orm import Mapped, relationship + +from src.db.models.helpers import enum_column +from src.db.models.impl.agency.enums import JurisdictionType, AgencyType +from src.db.models.impl.proposals.enums import ProposalStatus +from src.db.models.mixins import CreatedAtMixin +from src.db.models.templates_.with_id import WithIDBase + + +class ProposalAgency( + WithIDBase, + CreatedAtMixin +): + + __tablename__ = "proposal__agencies" + + name = Column(String, nullable=False) + agency_type: Mapped[AgencyType] = enum_column(AgencyType, name="agency_type_enum") + jurisdiction_type: Mapped[JurisdictionType] = enum_column( + JurisdictionType, + name="jurisdiction_type_enum", + nullable=False, + ) + proposing_user_id: Mapped[int | None] = Column(Integer, nullable=True) + proposal_status: Mapped[ProposalStatus] = enum_column(ProposalStatus, name="proposal_status_enum") + promoted_agency_id: Mapped[int | None] = Column( + Integer, + ForeignKey("agencies.id"), + nullable=True + ) + + locations = relationship( + "LocationExpandedView", + primaryjoin="ProposalAgency.id == ProposalLinkAgencyLocation.proposal_agency_id", + secondaryjoin="LocationExpandedView.id == ProposalLinkAgencyLocation.location_id", + secondary="proposal__link__agencies__locations", + ) diff --git a/src/db/models/impl/proposals/agency_/decision_info.py b/src/db/models/impl/proposals/agency_/decision_info.py new file mode 100644 index 00000000..5cc19dd0 --- /dev/null +++ b/src/db/models/impl/proposals/agency_/decision_info.py @@ -0,0 +1,27 @@ +""" +Provides decision information on an Agency + +""" +from sqlalchemy import Column, Integer, String, ForeignKey, PrimaryKeyConstraint +from sqlalchemy.orm import Mapped + +from src.db.models.mixins import CreatedAtMixin +from src.db.models.templates_.base import Base + + +class ProposalAgencyDecisionInfo( + Base, + CreatedAtMixin, +): + __tablename__ = "proposal__agencies__decision_info" + __table_args__ = ( + PrimaryKeyConstraint("proposal_agency_id"), + ) + + proposal_agency_id: Mapped[int] = Column( + Integer, + ForeignKey("proposal__agencies.id"), + nullable=False + ) + deciding_user_id: Mapped[int] = Column(Integer) + rejection_reason: Mapped[str | None] = Column(String, nullable=True) diff --git a/src/db/models/impl/proposals/agency_/link__location.py b/src/db/models/impl/proposals/agency_/link__location.py new file mode 100644 index 00000000..43d7c9fd --- /dev/null +++ b/src/db/models/impl/proposals/agency_/link__location.py @@ -0,0 +1,22 @@ +from sqlalchemy import PrimaryKeyConstraint, Column, ForeignKey, Integer +from sqlalchemy.orm import Mapped + +from src.db.models.mixins import LocationDependentMixin, CreatedAtMixin +from src.db.models.templates_.base import Base + + +class ProposalLinkAgencyLocation( + Base, + LocationDependentMixin, + CreatedAtMixin +): + __tablename__ = "proposal__link__agencies__locations" + __table_args__ = ( + PrimaryKeyConstraint("proposal_agency_id", "location_id"), + ) + + proposal_agency_id: Mapped[int] = Column( + Integer, + ForeignKey("proposal__agencies.id"), + nullable=False + ) \ No newline at end of file diff --git a/src/db/models/impl/proposals/enums.py b/src/db/models/impl/proposals/enums.py new file mode 100644 index 00000000..defd0d8c --- /dev/null +++ b/src/db/models/impl/proposals/enums.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class ProposalStatus(Enum): + PENDING = "pending" + APPROVED = "approved" + REJECTED = "rejected" \ No newline at end of file diff --git a/src/db/models/impl/url/core/pydantic/info.py b/src/db/models/impl/url/core/pydantic/info.py index 0985b3fc..74082427 100644 --- a/src/db/models/impl/url/core/pydantic/info.py +++ b/src/db/models/impl/url/core/pydantic/info.py @@ -1,9 +1,7 @@ import datetime -from typing import Optional from pydantic import BaseModel -from src.collectors.enums import URLStatus from src.db.models.impl.url.core.enums import URLSource @@ -12,7 +10,6 @@ class URLInfo(BaseModel): batch_id: int | None= None url: str collector_metadata: dict | None = None - status: URLStatus = URLStatus.OK updated_at: datetime.datetime | None = None created_at: datetime.datetime | None = None name: str | None = None diff --git a/src/db/models/impl/url/core/pydantic/insert.py b/src/db/models/impl/url/core/pydantic/insert.py index ed73b6c1..643cab15 100644 --- a/src/db/models/impl/url/core/pydantic/insert.py +++ b/src/db/models/impl/url/core/pydantic/insert.py @@ -1,5 +1,3 @@ -from src.collectors.enums import URLStatus -from src.core.enums import RecordType from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.templates_.base import Base @@ -17,6 +15,5 @@ def sa_model(cls) -> type[Base]: scheme: str | None = None collector_metadata: dict | None = None name: str | None = None - status: URLStatus = URLStatus.OK source: URLSource trailing_slash: bool \ No newline at end of file diff --git a/src/db/models/impl/url/core/sqlalchemy.py b/src/db/models/impl/url/core/sqlalchemy.py index de4af177..b9eedc5c 100644 --- a/src/db/models/impl/url/core/sqlalchemy.py +++ b/src/db/models/impl/url/core/sqlalchemy.py @@ -1,18 +1,26 @@ from sqlalchemy import Column, Text, String, JSON, case, literal, Boolean from sqlalchemy.ext.hybrid import hybrid_property from sqlalchemy.orm import relationship, Mapped -from sqlalchemy.util import hybridproperty -from src.collectors.enums import URLStatus from src.db.models.helpers import enum_column +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.impl.url.checked_for_duplicate import URLCheckedForDuplicate from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from src.db.models.mixins import UpdatedAtMixin, CreatedAtMixin from src.db.models.templates_.with_id import WithIDBase @@ -29,11 +37,6 @@ class URL(UpdatedAtMixin, CreatedAtMixin, WithIDBase): # The metadata from the collector collector_metadata = Column(JSON) # The outcome of the URL: submitted, human_labeling, rejected, duplicate, etc. - status: Mapped[URLStatus] = enum_column( - URLStatus, - name='url_status', - nullable=False - ) trailing_slash = Column(Boolean, nullable=False) @hybrid_property @@ -90,35 +93,42 @@ def full_url(cls): name_suggestions = relationship( - URLNameSuggestion + AnnotationNameSuggestion ) # Location user_location_suggestions = relationship( - UserLocationSuggestion + AnnotationLocationUser ) user_location_suggestion_not_found = relationship( LinkUserSuggestionLocationNotFound ) auto_location_subtasks = relationship( - AutoLocationIDSubtask + AnnotationLocationAutoSubtask ) + anon_location_suggestions = relationship( + AnnotationLocationAnon) # Agency user_agency_suggestions = relationship( - "UserURLAgencySuggestion", back_populates="url") + AnnotationAgencyUser, back_populates="url") auto_agency_subtasks = relationship( - "URLAutoAgencyIDSubtask" - ) + AnnotationAgencyAutoSubtask) + anon_agency_suggestions = relationship( + AnnotationAgencyAnon) # Record Type auto_record_type_suggestion = relationship( - "AutoRecordTypeSuggestion", uselist=False, back_populates="url") + AnnotationAutoRecordType, uselist=False, back_populates="url") user_record_type_suggestions = relationship( - "UserRecordTypeSuggestion", back_populates="url") + AnnotationRecordTypeUser, back_populates="url") + anon_record_type_suggestions = relationship( + AnnotationRecordTypeAnon) # Relvant/URL Type - auto_relevant_suggestion = relationship( - "AutoRelevantSuggestion", uselist=False, back_populates="url") - user_relevant_suggestions = relationship( - "UserURLTypeSuggestion", back_populates="url") + auto_url_type_suggestions = relationship( + AnnotationAutoURLType, uselist=False, back_populates="url") + user_url_type_suggestions = relationship( + AnnotationURLTypeUser, back_populates="url") + anon_url_type_suggestions = relationship( + AnnotationURLTypeAnon) reviewing_user = relationship( "ReviewingUserURL", uselist=False, back_populates="url") diff --git a/src/db/models/impl/url/suggestion/anonymous/__init__.py b/src/db/models/impl/url/suggestion/anonymous/__init__.py deleted file mode 100644 index fddc715f..00000000 --- a/src/db/models/impl/url/suggestion/anonymous/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from src.db.models.impl.url.suggestion.anonymous.session.sqlalchemy import AnonymousSession \ No newline at end of file diff --git a/src/db/models/impl/url/suggestion/name/pydantic.py b/src/db/models/impl/url/suggestion/name/pydantic.py deleted file mode 100644 index 244e02c2..00000000 --- a/src/db/models/impl/url/suggestion/name/pydantic.py +++ /dev/null @@ -1,17 +0,0 @@ -from pydantic import Field - -from src.db.models.impl.url.suggestion.location.auto.subtask.constants import MAX_SUGGESTION_LENGTH -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion -from src.db.templates.markers.bulk.insert import BulkInsertableModel - - -class URLNameSuggestionPydantic(BulkInsertableModel): - - url_id: int - suggestion: str = Field(..., max_length=MAX_SUGGESTION_LENGTH) - source: NameSuggestionSource - - @classmethod - def sa_model(cls) -> type[URLNameSuggestion]: - return URLNameSuggestion \ No newline at end of file diff --git a/src/db/models/materialized_views/batch_url_status/__init__.py b/src/db/models/materialized_views/batch_url_status/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/views/batch_url_status/core.py b/src/db/models/materialized_views/batch_url_status/core.py similarity index 98% rename from src/db/models/views/batch_url_status/core.py rename to src/db/models/materialized_views/batch_url_status/core.py index 1ec0711d..12d2872e 100644 --- a/src/db/models/views/batch_url_status/core.py +++ b/src/db/models/materialized_views/batch_url_status/core.py @@ -66,7 +66,7 @@ from src.db.models.templates_.base import Base -class BatchURLStatusMatView( +class BatchURLStatusMaterializedView( Base, ViewMixin, BatchDependentMixin diff --git a/src/db/models/views/batch_url_status/enums.py b/src/db/models/materialized_views/batch_url_status/enums.py similarity index 81% rename from src/db/models/views/batch_url_status/enums.py rename to src/db/models/materialized_views/batch_url_status/enums.py index 2f524de4..2ce74325 100644 --- a/src/db/models/views/batch_url_status/enums.py +++ b/src/db/models/materialized_views/batch_url_status/enums.py @@ -1,7 +1,7 @@ from enum import Enum -class BatchURLStatusEnum(Enum): +class BatchURLStatusViewEnum(Enum): ERROR = "Error" NO_URLS = "No URLs" LABELING_COMPLETE = "Labeling Complete" diff --git a/src/db/models/materialized_views/url_status/__init__.py b/src/db/models/materialized_views/url_status/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/views/url_status/enums.py b/src/db/models/materialized_views/url_status/enums.py similarity index 100% rename from src/db/models/views/url_status/enums.py rename to src/db/models/materialized_views/url_status/enums.py diff --git a/src/db/models/materialized_views/url_status/sqlalchemy.py b/src/db/models/materialized_views/url_status/sqlalchemy.py new file mode 100644 index 00000000..fe6c2466 --- /dev/null +++ b/src/db/models/materialized_views/url_status/sqlalchemy.py @@ -0,0 +1,15 @@ +from sqlalchemy.orm import Mapped + +from src.db.models.mixins import URLDependentViewMixin +from src.db.models.templates_.base import Base + + +class URLStatusMaterializedView( + Base, + URLDependentViewMixin +): + + __tablename__ = "url_status_mat_view" + + status: Mapped[str] + code: Mapped[int] \ No newline at end of file diff --git a/src/db/models/mixins.py b/src/db/models/mixins.py index 640ec955..4a8ae48f 100644 --- a/src/db/models/mixins.py +++ b/src/db/models/mixins.py @@ -1,6 +1,7 @@ from typing import ClassVar from sqlalchemy import Column, Integer, ForeignKey, TIMESTAMP, event +from sqlalchemy.orm import Mapped from src.db.models.exceptions import WriteToViewError from src.db.models.helpers import get_created_at_column, CURRENT_TIME_SERVER_DEFAULT, url_id_primary_key_constraint, \ @@ -41,7 +42,7 @@ class BatchDependentMixin: ) class LocationDependentMixin: - location_id = Column( + location_id: Mapped[int] = Column( Integer, ForeignKey( 'locations.id', diff --git a/src/db/models/views/url_anno_count.py b/src/db/models/views/url_anno_count.py index 2e910afb..139b0bac 100644 --- a/src/db/models/views/url_anno_count.py +++ b/src/db/models/views/url_anno_count.py @@ -5,7 +5,7 @@ u.id, count(anno.url_id) as cnt from urls u - inner join public.auto_location_id_subtasks anno on u.id = anno.url_id + inner join public.annotation__auto__location__subtasks anno on u.id = anno.url_id group by u.id ) , auto_agency_count as ( @@ -13,7 +13,7 @@ u.id, count(anno.url_id) as cnt from urls u - inner join public.url_auto_agency_id_subtasks anno on u.id = anno.url_id + inner join public.annotation__auto__agency__subtasks anno on u.id = anno.url_id group by u.id ) , auto_url_type_count as ( @@ -21,7 +21,7 @@ u.id, count(anno.url_id) as cnt from urls u - inner join public.auto_relevant_suggestions anno on u.id = anno.url_id + inner join public.annotation__auto__url_type anno on u.id = anno.url_id group by u.id ) , auto_record_type_count as ( @@ -29,7 +29,7 @@ u.id, count(anno.url_id) as cnt from urls u - inner join public.auto_record_type_suggestions anno on u.id = anno.url_id + inner join public.annotation__auto__record_type anno on u.id = anno.url_id group by u.id ) , user_location_count as ( @@ -37,7 +37,7 @@ u.id, count(anno.url_id) as cnt from urls u - inner join public.user_location_suggestions anno on u.id = anno.url_id + inner join public.annotation__user__location anno on u.id = anno.url_id group by u.id ) , user_agency_count as ( @@ -45,7 +45,7 @@ u.id, count(anno.url_id) as cnt from urls u - inner join public.user_url_agency_suggestions anno on u.id = anno.url_id + inner join public.annotation__user__agency anno on u.id = anno.url_id group by u.id ) , user_url_type_count as ( @@ -53,7 +53,7 @@ u.id, count(anno.url_id) as cnt from urls u - inner join public.user_url_type_suggestions anno on u.id = anno.url_id + inner join public.annotation__user__url_type anno on u.id = anno.url_id group by u.id ) , user_record_type_count as ( @@ -61,7 +61,7 @@ u.id, count(anno.url_id) as cnt from urls u - inner join public.user_record_type_suggestions anno on u.id = anno.url_id + inner join public.annotation__user__record_type anno on u.id = anno.url_id group by u.id ) select @@ -117,4 +117,8 @@ class URLAnnotationCount( user_location_count = Column(Integer, nullable=False) user_record_type_count = Column(Integer, nullable=False) user_url_type_count = Column(Integer, nullable=False) - total_anno_count = Column(Integer, nullable=False) \ No newline at end of file + anon_agency_count = Column(Integer, nullable=False) + anon_location_count = Column(Integer, nullable=False) + anon_record_type_count = Column(Integer, nullable=False) + anon_url_type_count = Column(Integer, nullable=False) + total_anno_count = Column(Integer, nullable=False) diff --git a/src/db/models/views/url_annotations_flags.py b/src/db/models/views/url_annotations_flags.py index c133fbfc..b194a5e0 100644 --- a/src/db/models/views/url_annotations_flags.py +++ b/src/db/models/views/url_annotations_flags.py @@ -11,12 +11,12 @@ CASE WHEN cua.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS has_confirmed_agency, CASE WHEN ruu.url_id IS NOT NULL THEN TRUE ELSE FALSE END AS was_reviewed FROM urls u - LEFT JOIN public.auto_record_type_suggestions arts ON u.id = arts.url_id - LEFT JOIN public.auto_relevant_suggestions ars ON u.id = ars.url_id + LEFT JOIN public.annotation__auto__record_type arts ON u.id = arts.url_id + LEFT JOIN public.annotation__auto__url_type ars ON u.id = ars.url_id LEFT JOIN public.{URL_AUTO_AGENCY_SUGGESTIONS_TABLE_NAME} auas ON u.id = auas.url_id - LEFT JOIN public.user_record_type_suggestions urts ON u.id = urts.url_id - LEFT JOIN public.user_relevant_suggestions urs ON u.id = urs.url_id - LEFT JOIN public.user_url_agency_suggestions uuas ON u.id = uuas.url_id + LEFT JOIN public.annotation__user__record_type urts ON u.id = urts.url_id + LEFT JOIN public.annotation__user__url_type urs ON u.id = urs.url_id + LEFT JOIN public.annotation__user__agency uuas ON u.id = uuas.url_id LEFT JOIN public.reviewing_user_url ruu ON u.id = ruu.url_id LEFT JOIN public.link_agencies__urls cua on u.id = cua.url_id ) diff --git a/src/db/models/views/url_status/core.py b/src/db/models/views/url_status/core.py deleted file mode 100644 index be771fe5..00000000 --- a/src/db/models/views/url_status/core.py +++ /dev/null @@ -1,72 +0,0 @@ -""" - CREATE MATERIALIZED VIEW url_status_mat_view AS - with - urls_with_relevant_errors as ( - select - ute.url_id - from - url_task_error ute - where - ute.task_type in ( - 'Screenshot', - 'HTML', - 'URL Probe' - ) - ) - select - u.id as url_id, - case - when ( - -- Validated as not relevant, individual record, or not found - fuv.type in ('not relevant', 'individual record', 'not found') - -- Has Meta URL in data sources app - OR udmu.url_id is not null - -- Has data source in data sources app - OR uds.url_id is not null - ) Then 'Submitted/Pipeline Complete' - when fuv.type is not null THEN 'Accepted' - when ( - -- Has compressed HTML - uch.url_id is not null - AND - -- Has web metadata - uwm.url_id is not null - AND - -- Has screenshot - us.url_id is not null - ) THEN 'Community Labeling' - when uwre.url_id is not null then 'Error' - ELSE 'Intake' - END as status - - from - urls u - left join urls_with_relevant_errors uwre - on u.id = uwre.url_id - left join url_screenshot us - on u.id = us.url_id - left join url_compressed_html uch - on u.id = uch.url_id - left join url_web_metadata uwm - on u.id = uwm.url_id - left join flag_url_validated fuv - on u.id = fuv.url_id - left join url_ds_meta_url udmu - on u.id = udmu.url_id - left join url_data_source uds - on u.id = uds.url_id -""" -from sqlalchemy import String, Column - -from src.db.models.helpers import url_id_primary_key_constraint -from src.db.models.mixins import ViewMixin, URLDependentMixin, URLDependentViewMixin -from src.db.models.templates_.base import Base - - -class URLStatusMatView( - Base, - URLDependentViewMixin -): - __tablename__ = "url_status_mat_view" - - status = Column(String) \ No newline at end of file diff --git a/src/db/queries/implementations/anonymous_session.py b/src/db/queries/implementations/anonymous_session.py index 0ff00ea3..a2fbf346 100644 --- a/src/db/queries/implementations/anonymous_session.py +++ b/src/db/queries/implementations/anonymous_session.py @@ -2,7 +2,7 @@ from sqlalchemy.ext.asyncio import AsyncSession -from src.db.models.impl.url.suggestion.anonymous import AnonymousSession +from src.db.models.impl.anon_session.sqlalchemy import AnonymousSession from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/db/queries/implementations/core/common/annotation_exists_/constants.py b/src/db/queries/implementations/core/common/annotation_exists_/constants.py index 190291ef..dbdfaa1b 100644 --- a/src/db/queries/implementations/core/common/annotation_exists_/constants.py +++ b/src/db/queries/implementations/core/common/annotation_exists_/constants.py @@ -1,15 +1,15 @@ -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.auto.sqlalchemy import AutoRelevantSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser ALL_ANNOTATION_MODELS = [ - AutoRecordTypeSuggestion, - AutoRelevantSuggestion, - URLAutoAgencyIDSubtask, - UserURLTypeSuggestion, - UserRecordTypeSuggestion, - UserURLAgencySuggestion + AnnotationAutoRecordType, + AnnotationAutoURLType, + AnnotationAgencyAutoSubtask, + AnnotationURLTypeUser, + AnnotationRecordTypeUser, + AnnotationAgencyUser ] diff --git a/src/db/queries/implementations/core/common/annotation_exists_/core.py b/src/db/queries/implementations/core/common/annotation_exists_/core.py index 53e8bcf6..4c7328a2 100644 --- a/src/db/queries/implementations/core/common/annotation_exists_/core.py +++ b/src/db/queries/implementations/core/common/annotation_exists_/core.py @@ -16,12 +16,11 @@ from sqlalchemy import case, func, Select, select -from src.collectors.enums import URLStatus -from src.db.queries.implementations.core.common.annotation_exists_.constants import ALL_ANNOTATION_MODELS from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.mixins import URLDependentMixin from src.db.queries.base.builder import QueryBuilderBase +from src.db.queries.implementations.core.common.annotation_exists_.constants import ALL_ANNOTATION_MODELS class AnnotationExistsCTEQueryBuilder(QueryBuilderBase): @@ -73,7 +72,6 @@ async def build(self) -> Any: FlagURLValidated.url_id == URL.id ) anno_exists_query = anno_exists_query.where( - URL.status == URLStatus.OK.value, FlagURLValidated.url_id.is_(None) ) anno_exists_query = anno_exists_query.group_by(URL.id).cte("annotations_exist") diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/builder.py b/src/db/queries/implementations/core/get/recent_batch_summaries/builder.py index 5de2eb55..f5696e7e 100644 --- a/src/db/queries/implementations/core/get/recent_batch_summaries/builder.py +++ b/src/db/queries/implementations/core/get/recent_batch_summaries/builder.py @@ -5,10 +5,9 @@ from src.api.endpoints.batch.dtos.get.summaries.counts import BatchSummaryURLCounts from src.api.endpoints.batch.dtos.get.summaries.summary import BatchSummary from src.collectors.enums import CollectorType -from src.core.enums import BatchStatus from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.views.batch_url_status.core import BatchURLStatusMatView -from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum +from src.db.models.materialized_views.batch_url_status.core import BatchURLStatusMaterializedView +from src.db.models.materialized_views.batch_url_status.enums import BatchURLStatusViewEnum from src.db.queries.base.builder import QueryBuilderBase from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.builder import URLCountsCTEQueryBuilder from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.labels import URLCountsLabels @@ -20,7 +19,7 @@ def __init__( self, page: int = 1, collector_type: CollectorType | None = None, - status: BatchURLStatusEnum | None = None, + status: BatchURLStatusViewEnum | None = None, batch_id: int | None = None, ): super().__init__() @@ -41,7 +40,7 @@ async def run(self, session: AsyncSession) -> list[BatchSummary]: *builder.get_all(), Batch.strategy, Batch.status, - BatchURLStatusMatView.batch_url_status, + BatchURLStatusMaterializedView.batch_url_status, Batch.parameters, Batch.user_id, Batch.compute_time, @@ -50,8 +49,8 @@ async def run(self, session: AsyncSession) -> list[BatchSummary]: builder.query, builder.get(count_labels.batch_id) == Batch.id, ).outerjoin( - BatchURLStatusMatView, - BatchURLStatusMatView.batch_id == Batch.id, + BatchURLStatusMaterializedView, + BatchURLStatusMaterializedView.batch_id == Batch.id, ).order_by( Batch.id.asc() ) @@ -75,7 +74,6 @@ async def run(self, session: AsyncSession) -> list[BatchSummary]: date_generated=row.date_generated, url_counts=BatchSummaryURLCounts( total=row[count_labels.total], - duplicate=row[count_labels.duplicate], not_relevant=row[count_labels.not_relevant], submitted=row[count_labels.submitted], errored=row[count_labels.error], diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py index 27240b7d..7192f1fa 100644 --- a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py +++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py @@ -1,20 +1,13 @@ -from sqlalchemy import Select, case, Label, and_, exists -from sqlalchemy.sql.functions import count, coalesce, func +from sqlalchemy import Select +from sqlalchemy.sql.functions import func -from src.collectors.enums import URLStatus, CollectorType -from src.core.enums import BatchStatus -from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.url.core.sqlalchemy import URL +from src.collectors.enums import CollectorType from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource -from src.db.models.views.batch_url_status.core import BatchURLStatusMatView -from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum +from src.db.models.materialized_views.batch_url_status.core import BatchURLStatusMaterializedView +from src.db.models.materialized_views.batch_url_status.enums import BatchURLStatusViewEnum from src.db.queries.base.builder import QueryBuilderBase from src.db.queries.helpers import add_page_offset from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.all import ALL_CTE -from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.duplicate import DUPLICATE_CTE from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.error import ERROR_CTE from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.not_relevant import NOT_RELEVANT_CTE from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte.pending import PENDING_CTE @@ -28,7 +21,7 @@ def __init__( self, page: int = 1, collector_type: CollectorType | None = None, - status: BatchURLStatusEnum | None = None, + status: BatchURLStatusViewEnum | None = None, batch_id: int | None = None ): super().__init__(URLCountsLabels()) @@ -43,7 +36,6 @@ def get_core_query(self): query = ( Select( Batch.id.label(labels.batch_id), - func.coalesce(DUPLICATE_CTE.count, 0).label(labels.duplicate), func.coalesce(SUBMITTED_CTE.count, 0).label(labels.submitted), func.coalesce(PENDING_CTE.count, 0).label(labels.pending), func.coalesce(ALL_CTE.count, 0).label(labels.total), @@ -52,11 +44,11 @@ def get_core_query(self): ) .select_from(Batch) .join( - BatchURLStatusMatView, - BatchURLStatusMatView.batch_id == Batch.id, + BatchURLStatusMaterializedView, + BatchURLStatusMaterializedView.batch_id == Batch.id, ) ) - for cte in [DUPLICATE_CTE, SUBMITTED_CTE, PENDING_CTE, ALL_CTE, NOT_RELEVANT_CTE, ERROR_CTE]: + for cte in [SUBMITTED_CTE, PENDING_CTE, ALL_CTE, NOT_RELEVANT_CTE, ERROR_CTE]: query = query.outerjoin( cte.cte, Batch.id == cte.batch_id @@ -86,4 +78,4 @@ def apply_collector_type_filter(self, query: Select): def apply_status_filter(self, query: Select): if self.status is None: return query - return query.where(BatchURLStatusMatView.batch_url_status == self.status.value) + return query.where(BatchURLStatusMaterializedView.batch_url_status == self.status.value) diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/duplicate.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/duplicate.py deleted file mode 100644 index 906dd49c..00000000 --- a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/duplicate.py +++ /dev/null @@ -1,29 +0,0 @@ -from sqlalchemy import select, func - -from src.collectors.enums import URLStatus -from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte_container import \ - URLCountsCTEContainer - -DUPLICATE_CTE = URLCountsCTEContainer( - select( - Batch.id, - func.count(URL.id).label("duplicate_count") - ) - .join( - LinkBatchURL, - LinkBatchURL.batch_id == Batch.id, - ) - .join( - URL, - URL.id == LinkBatchURL.url_id, - ) - .where( - URL.status == URLStatus.DUPLICATE - ) - .group_by( - Batch.id - ).cte("duplicate_count") -) \ No newline at end of file diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/error.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/error.py index 953a5c0d..2109588b 100644 --- a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/error.py +++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/error.py @@ -1,6 +1,5 @@ from sqlalchemy import select, func -from src.collectors.enums import URLStatus from src.db.helpers.query import exists_url from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/labels.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/labels.py index c55d8f45..72806c13 100644 --- a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/labels.py +++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/labels.py @@ -11,6 +11,5 @@ class URLCountsLabels(LabelsBase): submitted: str = "count_submitted" not_relevant: str = "count_not_relevant" error: str = "count_error" - duplicate: str = "count_duplicate" diff --git a/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py b/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py index d609e2b3..e95726bf 100644 --- a/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py +++ b/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py @@ -4,28 +4,28 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.metrics.dtos.get.urls.aggregated.pending import GetMetricsURLsAggregatedPendingResponseDTO -from src.collectors.enums import URLStatus +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion from src.db.models.mixins import URLDependentMixin from src.db.queries.base.builder import QueryBuilderBase from src.db.queries.implementations.core.common.annotation_exists_.core import AnnotationExistsCTEQueryBuilder + class PendingAnnotationExistsCTEQueryBuilder(AnnotationExistsCTEQueryBuilder): @property def has_user_relevant_annotation(self): - return self.get_exists_for_model(UserURLTypeSuggestion) + return self.get_exists_for_model(AnnotationURLTypeUser) @property def has_user_record_type_annotation(self): - return self.get_exists_for_model(UserRecordTypeSuggestion) + return self.get_exists_for_model(AnnotationRecordTypeUser) @property def has_user_agency_annotation(self): - return self.get_exists_for_model(UserURLAgencySuggestion) + return self.get_exists_for_model(AnnotationAgencyUser) def get_exists_for_model(self, model: Type[URLDependentMixin]): return self.query.c[ @@ -43,9 +43,7 @@ async def build(self) -> Any: URL, URL.id == self.url_id ) - .where( - URL.status == URLStatus.OK.value - ).cte("pending") + .cte("pending") ) diff --git a/src/db/statement_composer.py b/src/db/statement_composer.py index faa965a8..d3e90b8b 100644 --- a/src/db/statement_composer.py +++ b/src/db/statement_composer.py @@ -1,20 +1,9 @@ -from http import HTTPStatus -from typing import Any +from sqlalchemy import Select, select, exists, func, Subquery, not_, ColumnElement -from sqlalchemy import Select, select, exists, func, Subquery, and_, not_, ColumnElement -from sqlalchemy.orm import selectinload - -from src.collectors.enums import URLStatus -from src.db.enums import TaskType from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.link.task_url import LinkTaskURL -from src.db.models.impl.task.core import Task -from src.db.models.impl.task.enums import TaskStatus from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata -from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo -from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.types import UserSuggestionType @@ -23,46 +12,6 @@ class StatementComposer: Assists in the composition of SQLAlchemy statements """ - @staticmethod - def has_non_errored_urls_without_html_data() -> Select: - exclude_subquery = ( - select(1). - select_from(LinkTaskURL). - join(Task, LinkTaskURL.task_id == Task.id). - where(LinkTaskURL.url_id == URL.id). - where(Task.task_type == TaskType.HTML.value). - where(Task.task_status == TaskStatus.COMPLETE.value) - ) - query = ( - select(URL) - .join(URLWebMetadata) - .outerjoin(URLScrapeInfo) - .where( - URLScrapeInfo.url_id == None, - ~exists(exclude_subquery), - URLWebMetadata.status_code == HTTPStatus.OK.value, - URLWebMetadata.content_type.like("%html%"), - ) - .options( - selectinload(URL.batch) - ) - ) - return query - - @staticmethod - def exclude_urls_with_extant_model( - statement: Select, - model: Any - ): - return (statement.where( - ~exists( - select(model.id). - where( - model.url_id == URL.id - ) - ) - )) - @staticmethod def simple_count_subquery(model, attribute: str, label: str) -> Subquery: attr_value = getattr(model, attribute) @@ -74,12 +23,9 @@ def simple_count_subquery(model, attribute: str, label: str) -> Subquery: @staticmethod def pending_urls_missing_miscellaneous_metadata_query() -> Select: query = select(URL).where( - and_( - URL.status == URLStatus.OK.value, URL.name == None, URL.description == None, URLOptionalDataSourceMetadata.url_id == None - ) ).outerjoin( URLOptionalDataSourceMetadata ).join( diff --git a/src/db/types.py b/src/db/types.py index c224a36c..df065cab 100644 --- a/src/db/types.py +++ b/src/db/types.py @@ -1,10 +1,10 @@ from typing import TypeVar -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.queries.base.labels import LabelsBase -UserSuggestionType = UserURLAgencySuggestion | UserURLTypeSuggestion | UserRecordTypeSuggestion +UserSuggestionType = AnnotationAgencyUser | AnnotationURLTypeUser | AnnotationRecordTypeUser LabelsType = TypeVar("LabelsType", bound=LabelsBase) \ No newline at end of file diff --git a/src/external/pdap/_templates/request_builder.py b/src/external/pdap/_templates/request_builder.py index 2cde6c51..d944efdf 100644 --- a/src/external/pdap/_templates/request_builder.py +++ b/src/external/pdap/_templates/request_builder.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from http import HTTPStatus -from typing import Any +from typing import Any, TypeVar from pdap_access_manager.access_manager.async_ import AccessManagerAsync from pdap_access_manager.enums import RequestType @@ -8,6 +8,7 @@ from pdap_access_manager.models.response import ResponseInfo from pydantic import BaseModel +T = TypeVar("T", bound=BaseModel) class PDAPRequestBuilderBase(ABC): @@ -37,6 +38,38 @@ async def post( raise Exception(f"Failed to make request to PDAP: {response_info.data}") return response_info.data + async def post_v2( + self, + url: str, + request_model: BaseModel, + return_model_type: type[T] + ) -> T: + request_info = RequestInfo( + type_=RequestType.POST, + url=url, + json_=request_model.model_dump(mode='json'), + headers=await self.access_manager.jwt_header() + ) + response_info: ResponseInfo = await self.access_manager.make_request(request_info) + if response_info.status_code != HTTPStatus.OK: + raise Exception(f"Failed to make request to PDAP: {response_info.data}") + return return_model_type(**response_info.data) + + async def get( + self, + url: str, + return_model_type: type[T] + ) -> T: + request_info = RequestInfo( + type_=RequestType.GET, + url=url, + headers=await self.access_manager.jwt_header() + ) + response_info: ResponseInfo = await self.access_manager.make_request(request_info) + if response_info.status_code != HTTPStatus.OK: + raise Exception(f"Failed to make request to PDAP: {response_info.data}") + return return_model_type(**response_info.data) + @abstractmethod async def inner_logic(self) -> Any: raise NotImplementedError diff --git a/src/external/pdap/client.py b/src/external/pdap/client.py index 38c67e08..d3cb1209 100644 --- a/src/external/pdap/client.py +++ b/src/external/pdap/client.py @@ -22,26 +22,3 @@ async def run_request_builder( request_builder: PDAPRequestBuilderBase ) -> Any: return await request_builder.run(self.access_manager) - - async def is_url_duplicate( - self, - url_to_check: str - ) -> bool: - """ - Check if a URL is unique. Returns duplicate info otherwise - """ - url: str = f"{self.access_manager.data_sources_url}/v2/check/unique-url" - - request_info = RequestInfo( - type_=RequestType.GET, - url=url, - params={ - "url": url_to_check - } - ) - response_info: ResponseInfo = await self.access_manager.make_request(request_info) - duplicates: list[UniqueURLDuplicateInfo] = [ - UniqueURLDuplicateInfo(**entry) for entry in response_info.data["duplicates"] - ] - is_duplicate: bool = (len(duplicates) != 0) - return is_duplicate diff --git a/src/external/pdap/impl/follows/__init__.py b/src/external/pdap/impl/follows/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/follows/core.py b/src/external/pdap/impl/follows/core.py new file mode 100644 index 00000000..97281395 --- /dev/null +++ b/src/external/pdap/impl/follows/core.py @@ -0,0 +1,20 @@ +from pdap_access_manager import AccessManager, DataSourcesNamespaces, RequestInfo, RequestType, ResponseInfo + +from src.external.pdap.impl.follows.response import GetFollowsResponse, LinkUserFollow + + +async def get_user_followed_locations( + access_manager: AccessManager, +) -> GetFollowsResponse: + + url: str = f"{access_manager.data_sources_url}/v3/v2/source-collector/follows" + headers: dict[str, str] = await access_manager.jwt_header() + request_info = RequestInfo( + type_=RequestType.GET, + url=url, + headers=headers + ) + response_info: ResponseInfo = await access_manager.make_request(request_info) + return GetFollowsResponse( + **response_info.data + ) \ No newline at end of file diff --git a/src/external/pdap/impl/follows/response.py b/src/external/pdap/impl/follows/response.py new file mode 100644 index 00000000..a37894f5 --- /dev/null +++ b/src/external/pdap/impl/follows/response.py @@ -0,0 +1,11 @@ +from pydantic import BaseModel + + +class LinkUserFollow(BaseModel): + user_id: int + location_id: int + + +class GetFollowsResponse(BaseModel): + follows: list[LinkUserFollow] + diff --git a/src/external/pdap/impl/sync/follows/__init__.py b/src/external/pdap/impl/sync/follows/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/follows/core.py b/src/external/pdap/impl/sync/follows/core.py new file mode 100644 index 00000000..8b442e56 --- /dev/null +++ b/src/external/pdap/impl/sync/follows/core.py @@ -0,0 +1,13 @@ +from src.external.pdap._templates.request_builder import PDAPRequestBuilderBase +from src.external.pdap.impl.sync.follows.response import SyncFollowGetInnerResponse, SyncFollowGetOuterResponse + + +class GetFollowsRequestBuilder(PDAPRequestBuilderBase): + + async def inner_logic(self) -> list[SyncFollowGetInnerResponse]: + url: str = self.build_url("v3/sync/follows") + response: SyncFollowGetOuterResponse = await self.get( + url=url, + return_model_type=SyncFollowGetOuterResponse + ) + return response.follows diff --git a/src/external/pdap/impl/sync/follows/response.py b/src/external/pdap/impl/sync/follows/response.py new file mode 100644 index 00000000..abdde583 --- /dev/null +++ b/src/external/pdap/impl/sync/follows/response.py @@ -0,0 +1,9 @@ +from pydantic import BaseModel + + +class SyncFollowGetInnerResponse(BaseModel): + user_id: int + location_id: int + +class SyncFollowGetOuterResponse(BaseModel): + follows: list[SyncFollowGetInnerResponse] diff --git a/src/security/manager.py b/src/security/manager.py index 16f0519e..8ec7996a 100644 --- a/src/security/manager.py +++ b/src/security/manager.py @@ -64,11 +64,16 @@ def check_access( oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") -def get_access_info( +def get_admin_access_info( token: Annotated[str, Depends(oauth2_scheme)] ) -> AccessInfo: return SecurityManager().check_access(token, Permissions.SOURCE_COLLECTOR) +def get_standard_user_access_info( + token: Annotated[str, Depends(oauth2_scheme)] +) -> AccessInfo: + return SecurityManager().validate_token(token) + def require_permission(permission: Permissions): def dependency(token: Annotated[str, Depends(oauth2_scheme)]) -> AccessInfo: return SecurityManager().check_access(token, permission=permission) diff --git a/tests/alembic/helpers.py b/tests/alembic/helpers.py index a284e0fc..0e19d035 100644 --- a/tests/alembic/helpers.py +++ b/tests/alembic/helpers.py @@ -1,5 +1,3 @@ -from typing import Optional - from sqlalchemy import text from sqlalchemy.orm import Session diff --git a/tests/automated/integration/api/_helpers/RequestValidator.py b/tests/automated/integration/api/_helpers/RequestValidator.py index 0db00cb3..b1bfbf20 100644 --- a/tests/automated/integration/api/_helpers/RequestValidator.py +++ b/tests/automated/integration/api/_helpers/RequestValidator.py @@ -10,7 +10,6 @@ from src.api.endpoints.batch.dtos.get.logs import GetBatchLogsResponse from src.api.endpoints.batch.dtos.get.summaries.response import GetBatchSummariesResponse from src.api.endpoints.batch.dtos.get.summaries.summary import BatchSummary -from src.api.shared.models.message_response import MessageResponse from src.api.endpoints.batch.duplicates.dto import GetDuplicatesByBatchResponse from src.api.endpoints.batch.urls.dto import GetURLsByBatchResponse from src.api.endpoints.collector.dtos.manual_batch.post import ManualBatchInputDTO @@ -32,11 +31,12 @@ from src.api.endpoints.task.dtos.get.task_status import GetTaskStatusResponseInfo from src.api.endpoints.task.dtos.get.tasks import GetTasksResponse from src.api.endpoints.url.get.dto import GetURLsResponseInfo +from src.api.shared.models.message_response import MessageResponse from src.collectors.enums import CollectorType from src.collectors.impl.example.dtos.input import ExampleInputDTO from src.core.enums import BatchStatus from src.db.enums import TaskType -from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum +from src.db.models.materialized_views.batch_url_status.enums import BatchURLStatusViewEnum from src.util.helper_functions import update_if_not_none @@ -268,7 +268,7 @@ def delete( def get_batch_statuses( self, collector_type: CollectorType | None = None, - status: BatchURLStatusEnum | None = None, + status: BatchURLStatusViewEnum | None = None, ) -> GetBatchSummariesResponse: params = {} update_if_not_none( diff --git a/tests/automated/integration/api/annotate/all/test_anon_count.py b/tests/automated/integration/api/annotate/all/test_anon_count.py new file mode 100644 index 00000000..05975236 --- /dev/null +++ b/tests/automated/integration/api/annotate/all/test_anon_count.py @@ -0,0 +1,125 @@ +import uuid + +import pytest + +from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse +from src.core.enums import RecordType +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon +from src.db.models.impl.anon_session.sqlalchemy import AnonymousSession +from src.db.models.impl.flag.url_validated.enums import URLType +from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo +from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review + + +@pytest.mark.asyncio +async def test_anon_count( + api_test_helper, + test_agency_id: int, + pennsylvania: USStateCreationInfo, +): + """ + Test that the user annotation counts are updated correctly + when anonymous annotations are added. + """ + ath = api_test_helper + adb_client = ath.adb_client() + + # Set up URLs + setup_info_1 = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, include_user_annotations=True + ) + url_id: int = setup_info_1.url_mapping.url_id + + # Add anonymous sessions + anon_sessions: list[AnonymousSession] = [] + for i in range(12): + anon_session = AnonymousSession( + id=uuid.uuid4(), + ) + anon_sessions.append(anon_session) + await adb_client.add_all(anon_sessions) + + def get_anon_session_id(i: int) -> uuid.UUID: + return anon_sessions[i].id + + + # URL Types + url_type_annotations: list[AnnotationURLTypeAnon] = [] + for i in range(3): + url_type_annotation = AnnotationURLTypeAnon( + url_id=url_id, + session_id=get_anon_session_id(i), + url_type=URLType.DATA_SOURCE + ) + url_type_annotations.append(url_type_annotation) + await adb_client.add_all(url_type_annotations) + + + + # Record Types + record_type_annotations: list[AnnotationRecordTypeAnon] = [] + for i in range(5): + record_type_annotation = AnnotationRecordTypeAnon( + url_id=url_id, + session_id=get_anon_session_id(i), + record_type=RecordType.CAR_GPS + ) + record_type_annotations.append(record_type_annotation) + await adb_client.add_all(record_type_annotations) + + + + # Agencies + agency_annotations: list[AnnotationAgencyAnon] = [] + for i in range(7): + agency_annotation = AnnotationAgencyAnon( + url_id=url_id, + agency_id=test_agency_id, + session_id=get_anon_session_id(i) + ) + agency_annotations.append(agency_annotation) + await adb_client.add_all(agency_annotations) + + + # Locations + location_annotations: list[AnnotationLocationAnon] = [] + for i in range(9): + location_annotation = AnnotationLocationAnon( + url_id=url_id, + session_id=get_anon_session_id(i), + location_id=pennsylvania.location_id, + ) + location_annotations.append(location_annotation) + await adb_client.add_all(location_annotations) + + # Name + name_suggestion = AnnotationNameSuggestion( + url_id=url_id, + suggestion="Test Name", + source=NameSuggestionSource.USER, + ) + name_suggestion_id = await adb_client.add(name_suggestion, return_id=True) + + name_annotations: list[AnnotationNameAnonEndorsement] = [] + for i in range(11): + name_annotation = AnnotationNameAnonEndorsement( + suggestion_id=name_suggestion_id, + session_id=get_anon_session_id(i), + ) + name_annotations.append(name_annotation) + await adb_client.add_all(name_annotations) + + # Check that the counts are correct + get_response_1: GetNextURLForAllAnnotationResponse = await ath.request_validator.get_next_url_for_all_annotations() + assert get_response_1.next_annotation is not None + assert get_response_1.next_annotation.name_suggestions.suggestions[1].user_count == 5 + assert get_response_1.next_annotation.location_suggestions.suggestions[0].user_count == 4 + assert get_response_1.next_annotation.agency_suggestions.suggestions[0].user_count == 3 + assert get_response_1.next_annotation.record_type_suggestions.suggestions[0].user_count == 2 + assert get_response_1.next_annotation.url_type_suggestions[0].endorsement_count == 1 diff --git a/tests/automated/integration/api/annotate/all/test_happy_path.py b/tests/automated/integration/api/annotate/all/test_happy_path.py index 49d8bd97..8a62c3e8 100644 --- a/tests/automated/integration/api/annotate/all/test_happy_path.py +++ b/tests/automated/integration/api/annotate/all/test_happy_path.py @@ -8,13 +8,13 @@ from src.api.endpoints.annotate.all.post.models.name import AnnotationPostNameInfo from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo from src.core.enums import RecordType +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review @@ -46,7 +46,7 @@ async def test_annotate_all( url_mapping_2 = setup_info_2.url_mapping # Get a valid URL to annotate - get_response_1 = await ath.request_validator.get_next_url_for_all_annotations() + get_response_1: GetNextURLForAllAnnotationResponse = await ath.request_validator.get_next_url_for_all_annotations() assert get_response_1.next_annotation is not None assert len(get_response_1.next_annotation.name_suggestions.suggestions) == 1 name_suggestion = get_response_1.next_annotation.name_suggestions.suggestions[0] @@ -106,19 +106,19 @@ async def test_annotate_all( # Check that all annotations are present in the database # Check URL Type Suggestions - all_relevance_suggestions: list[UserURLTypeSuggestion] = await adb_client.get_all(UserURLTypeSuggestion) + all_relevance_suggestions: list[AnnotationURLTypeUser] = await adb_client.get_all(AnnotationURLTypeUser) assert len(all_relevance_suggestions) == 4 suggested_types: set[URLType] = {sugg.type for sugg in all_relevance_suggestions} assert suggested_types == {URLType.DATA_SOURCE, URLType.NOT_RELEVANT} # Should be one agency - all_agency_suggestions = await adb_client.get_all(UserURLAgencySuggestion) + all_agency_suggestions = await adb_client.get_all(AnnotationAgencyUser) assert len(all_agency_suggestions) == 3 suggested_agency_ids: set[int] = {sugg.agency_id for sugg in all_agency_suggestions} assert agency_id in suggested_agency_ids # Should be one record type - all_record_type_suggestions = await adb_client.get_all(UserRecordTypeSuggestion) + all_record_type_suggestions = await adb_client.get_all(AnnotationRecordTypeUser) assert len(all_record_type_suggestions) == 3 suggested_record_types: set[RecordType] = { sugg.record_type for sugg in all_record_type_suggestions @@ -126,7 +126,7 @@ async def test_annotate_all( assert RecordType.ACCIDENT_REPORTS.value in suggested_record_types # Confirm 3 Location Suggestions, with two belonging to California and one to Pennsylvania - all_location_suggestions = await adb_client.get_all(UserLocationSuggestion) + all_location_suggestions = await adb_client.get_all(AnnotationLocationUser) assert len(all_location_suggestions) == 2 location_ids: list[int] = [location_suggestion.location_id for location_suggestion in all_location_suggestions] assert set(location_ids) == {california.location_id, pennsylvania.location_id} @@ -166,12 +166,12 @@ async def test_annotate_all( assert user_suggestion.user_count == 1 # Confirm 3 name suggestions - name_suggestions: list[URLNameSuggestion] = await adb_client.get_all(URLNameSuggestion) + name_suggestions: list[AnnotationNameSuggestion] = await adb_client.get_all(AnnotationNameSuggestion) assert len(name_suggestions) == 3 suggested_names: set[str] = {name_suggestion.suggestion for name_suggestion in name_suggestions} assert "New Name" in suggested_names # Confirm 2 link user name suggestions - link_user_name_suggestions: list[LinkUserNameSuggestion] = await adb_client.get_all(LinkUserNameSuggestion) + link_user_name_suggestions: list[AnnotationNameUserEndorsement] = await adb_client.get_all(AnnotationNameUserEndorsement) assert len(link_user_name_suggestions) == 2 diff --git a/tests/automated/integration/api/annotate/all/test_sorting.py b/tests/automated/integration/api/annotate/all/test_sorting.py new file mode 100644 index 00000000..2f9f7b2a --- /dev/null +++ b/tests/automated/integration/api/annotate/all/test_sorting.py @@ -0,0 +1,112 @@ +import pytest + +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.link.location__user_follow import LinkLocationUserFollow +from src.db.models.impl.link.location_batch.sqlalchemy import LinkLocationBatch +from src.db.models.impl.url.core.enums import URLSource +from tests.automated.integration.conftest import MOCK_USER_ID +from tests.helpers.api_test_helper import APITestHelper +from tests.helpers.data_creator.models.creation_info.county import CountyCreationInfo +from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo +from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review +from tests.helpers.setup.final_review.model import FinalReviewSetupInfo + + +@pytest.mark.asyncio +async def test_annotate_sorting( + api_test_helper: APITestHelper, + test_batch_id: int, + pittsburgh_locality: LocalityCreationInfo, + allegheny_county: CountyCreationInfo, +): + """ + Test that annotations are prioritized in the following order: + - Any manual submissions are prioritized first + - Then prioritize by number of annotations descending + - Then prioritize by URL ID ascending (e.g. least recently created) + """ + ath = api_test_helper + dbc: AsyncDatabaseClient = ath.adb_client() + + # First URL created should be prioritized in absence of any other factors + setup_info_first_annotation: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, + include_user_annotations=False + ) + get_response_1 = await ath.request_validator.get_next_url_for_all_annotations() + assert get_response_1.next_annotation is not None + assert get_response_1.next_annotation.url_info.url_id == setup_info_first_annotation.url_mapping.url_id + + # ...But higher annotation count should take precedence over least recently created + setup_info_high_annotations: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, + include_user_annotations=True + ) + get_response_2 = await ath.request_validator.get_next_url_for_all_annotations() + assert get_response_2.next_annotation is not None + assert get_response_2.next_annotation.url_info.url_id == setup_info_high_annotations.url_mapping.url_id + + # ...But manual submissions should take precedence over higher annotation count + setup_info_manual_submission: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, + source=URLSource.MANUAL, + include_user_annotations=True + ) + get_response_3 = await ath.request_validator.get_next_url_for_all_annotations() + assert get_response_3.next_annotation is not None + assert get_response_3.next_annotation.url_info.url_id == setup_info_manual_submission.url_mapping.url_id + + # URL with followed_by_any_user should take precedence over manual submissions + + ## Start by adding a new URL + setup_info_followed_by_any_user: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, + include_user_annotations=False + ) + ## Add a link between that URL's batch and a location + link_batch_location = LinkLocationBatch( + batch_id=setup_info_followed_by_any_user.batch_id, + location_id=pittsburgh_locality.location_id + ) + await dbc.add(link_batch_location) + ## Add a link between that location and a user + link_location_user_follow = LinkLocationUserFollow( + location_id=pittsburgh_locality.location_id, + user_id=MOCK_USER_ID + 1 # To ensure it's not the same user we'll be using later on. + ) + await dbc.add(link_location_user_follow) + + # Run get_next_url_for_all_annotations + get_response_4 = await ath.request_validator.get_next_url_for_all_annotations() + # Assert that the URL with followed_by_any_user is returned + assert get_response_4.next_annotation is not None + assert get_response_4.next_annotation.url_info.url_id == setup_info_followed_by_any_user.url_mapping.url_id + + # URL whose associated location is followed by this specific user + # should take precedence over URL whose associated location + # is followed by any user + + ## Start by adding a new URL + setup_info_followed_by_annotating_user: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, + include_user_annotations=False + ) + + ## Add a link between that URL's batch and a location + link_batch_location = LinkLocationBatch( + batch_id=setup_info_followed_by_annotating_user.batch_id, + location_id=allegheny_county.location_id + ) + await dbc.add(link_batch_location) + ## Add a link between that location and the mock user + link_location_user_follow = LinkLocationUserFollow( + location_id=allegheny_county.location_id, + user_id=MOCK_USER_ID + ) + await dbc.add(link_location_user_follow) + + get_response_5 = await ath.request_validator.get_next_url_for_all_annotations() + # Assert that the URL with followed_by_any_user is returned + assert get_response_5.next_annotation is not None + assert get_response_5.next_annotation.url_info.url_id == setup_info_followed_by_annotating_user.url_mapping.url_id + diff --git a/tests/automated/integration/api/annotate/anonymous/test_core.py b/tests/automated/integration/api/annotate/anonymous/test_core.py index 26516b16..65f18965 100644 --- a/tests/automated/integration/api/annotate/anonymous/test_core.py +++ b/tests/automated/integration/api/annotate/anonymous/test_core.py @@ -3,22 +3,30 @@ import pytest from src.api.endpoints.annotate.all.get.models.name import NameAnnotationSuggestion -from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.api.endpoints.annotate.all.post.models.agency import AnnotationPostAgencyInfo from src.api.endpoints.annotate.all.post.models.location import AnnotationPostLocationInfo from src.api.endpoints.annotate.all.post.models.name import AnnotationPostNameInfo from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo from src.api.endpoints.annotate.anonymous.get.response import GetNextURLForAnonymousAnnotationResponse +from src.api.shared.models.message_response import MessageResponse from src.core.enums import RecordType from src.db.dtos.url.mapping_.simple import SimpleURLMapping +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency -from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation -from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType from src.db.models.mixins import URLDependentMixin from tests.automated.integration.api.annotate.anonymous.helper import get_next_url_for_anonymous_annotation, \ post_and_get_next_url_for_anonymous_annotation +from tests.automated.integration.conftest import MOCK_USER_ID from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review from tests.helpers.setup.final_review.model import FinalReviewSetupInfo @@ -80,16 +88,26 @@ async def test_annotate_anonymous( assert post_response_1.next_annotation.url_info.url_id != get_response_1.next_annotation.url_info.url_id for model in [ - AnonymousAnnotationAgency, - AnonymousAnnotationLocation, - AnonymousAnnotationRecordType, - AnonymousAnnotationURLType + AnnotationAgencyAnon, + AnnotationLocationAnon, + AnnotationRecordTypeAnon, + AnnotationURLTypeAnon ]: instances: list[URLDependentMixin] = await ddc.adb_client.get_all(model) assert len(instances) == 1 instance: model = instances[0] assert instance.url_id == get_response_1.next_annotation.url_info.url_id + # Check for existence of name suggestion (2 were added by setup) + name_suggestions: list[AnnotationNameSuggestion] = await ddc.adb_client.get_all(AnnotationNameSuggestion) + assert len(name_suggestions) == 3 + + # Check for existence of link + link_instances: list[AnnotationNameAnonEndorsement] = await ddc.adb_client.get_all(AnnotationNameAnonEndorsement) + assert len(link_instances) == 1 + link_instance: AnnotationNameAnonEndorsement = link_instances[0] + assert link_instance.session_id == session_id + # Run again without giving session ID, confirm original URL returned get_response_2: GetNextURLForAnonymousAnnotationResponse = await get_next_url_for_anonymous_annotation(rv) assert get_response_2.session_id != session_id @@ -102,3 +120,52 @@ async def test_annotate_anonymous( assert get_response_3.next_annotation is not None assert get_response_3.next_annotation.url_info.url_id == post_response_1.next_annotation.url_info.url_id + ### TEST MIGRATION ### + # Call the migration endpoint with a user ID, and confirm all anonymous annotations have transferred to the user. + response: MessageResponse = rv.post_v3( + f'/annotate/migrate?session_id={session_id}', + expected_model=MessageResponse, + ) + assert response.message == 'Annotations migrated successfully.' + + # Check all annotations + + # URL Types + url_types: list[AnnotationURLTypeUser] = await ddc.adb_client.get_all(AnnotationURLTypeUser) + assert len(url_types) == 3 + annotation_url_type: AnnotationURLTypeUser = url_types[-1] + assert annotation_url_type.user_id == MOCK_USER_ID + assert annotation_url_type.url_id == get_response_1.next_annotation.url_info.url_id + assert annotation_url_type.type == URLType.DATA_SOURCE + + # Locations + locations: list[AnnotationLocationUser] = await ddc.adb_client.get_all(AnnotationLocationUser) + assert len(locations) == 1 + annotation_location: AnnotationLocationUser = locations[0] + assert annotation_location.user_id == MOCK_USER_ID + assert annotation_location.url_id == get_response_1.next_annotation.url_info.url_id + assert annotation_location.location_id == pennsylvania.location_id + + # Agencies + agencies: list[AnnotationAgencyUser] = await ddc.adb_client.get_all(AnnotationAgencyUser) + assert len(agencies) == 3 + annotation_agency: AnnotationAgencyUser = agencies[-1] + assert annotation_agency.user_id == MOCK_USER_ID + assert annotation_agency.url_id == get_response_1.next_annotation.url_info.url_id + assert annotation_agency.agency_id == agency_id + + # Record Types + record_types: list[AnnotationRecordTypeUser] = await ddc.adb_client.get_all(AnnotationRecordTypeUser) + assert len(record_types) == 3 + annotation_record_type: AnnotationRecordTypeUser = record_types[-1] + assert annotation_record_type.user_id == MOCK_USER_ID + assert annotation_record_type.url_id == get_response_1.next_annotation.url_info.url_id + assert annotation_record_type.record_type == RecordType.ACCIDENT_REPORTS.value + + # Name Suggestions + name_suggestions: list[AnnotationNameUserEndorsement] = await ddc.adb_client.get_all(AnnotationNameUserEndorsement) + assert len(name_suggestions) == 1 + annotation_name: AnnotationNameUserEndorsement = name_suggestions[0] + assert annotation_name.user_id == MOCK_USER_ID + + diff --git a/tests/automated/integration/api/batch/summaries/test_happy_path.py b/tests/automated/integration/api/batch/summaries/test_happy_path.py index 6af9ce2b..126f1118 100644 --- a/tests/automated/integration/api/batch/summaries/test_happy_path.py +++ b/tests/automated/integration/api/batch/summaries/test_happy_path.py @@ -68,7 +68,6 @@ async def test_get_batch_summaries(api_test_helper): assert counts_1.pending == 1 assert counts_1.submitted == 2 assert counts_1.not_relevant == 0 - assert counts_1.duplicate == 0 assert counts_1.errored == 0 result_2 = results[1] @@ -79,7 +78,6 @@ async def test_get_batch_summaries(api_test_helper): assert counts_2.errored == 0 assert counts_2.pending == 0 assert counts_2.submitted == 0 - assert counts_2.duplicate == 0 result_3 = results[2] assert result_3.id == batch_3_id @@ -89,4 +87,3 @@ async def test_get_batch_summaries(api_test_helper): assert counts_3.errored == 0 assert counts_3.pending == 7 assert counts_3.submitted == 1 - assert counts_3.duplicate == 7 diff --git a/tests/automated/integration/api/batch/summaries/test_pending_url_filter.py b/tests/automated/integration/api/batch/summaries/test_pending_url_filter.py index f4181629..7ebc4ccf 100644 --- a/tests/automated/integration/api/batch/summaries/test_pending_url_filter.py +++ b/tests/automated/integration/api/batch/summaries/test_pending_url_filter.py @@ -3,7 +3,7 @@ from src.collectors.enums import CollectorType from src.core.enums import BatchStatus from src.db.dtos.url.mapping_.simple import SimpleURLMapping -from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum +from src.db.models.materialized_views.batch_url_status.enums import BatchURLStatusViewEnum from tests.helpers.batch_creation_parameters.enums import URLCreationEnum from tests.helpers.data_creator.core import DBDataCreator @@ -52,7 +52,7 @@ async def test_get_batch_summaries_pending_url_filter(api_test_helper): # Test filter for pending URLs and only retrieve the second batch pending_urls_results = ath.request_validator.get_batch_statuses( - status=BatchURLStatusEnum.HAS_UNLABELED_URLS + status=BatchURLStatusViewEnum.HAS_UNLABELED_URLS ) assert len(pending_urls_results.results) == 1 diff --git a/tests/automated/integration/api/metrics/batches/test_aggregated.py b/tests/automated/integration/api/metrics/batches/test_aggregated.py index 3d84d6d7..00936d15 100644 --- a/tests/automated/integration/api/metrics/batches/test_aggregated.py +++ b/tests/automated/integration/api/metrics/batches/test_aggregated.py @@ -1,6 +1,6 @@ import pytest -from src.collectors.enums import CollectorType, URLStatus +from src.collectors.enums import CollectorType from src.core.enums import BatchStatus from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.mapping_.simple import SimpleURLMapping @@ -25,12 +25,10 @@ async def test_get_batches_aggregated_metrics( ) url_mappings_broken: list[SimpleURLMapping] = await create_urls( adb_client=adb_client, - status=URLStatus.BROKEN, count=4, ) url_mappings_ok: list[SimpleURLMapping] = await create_urls( adb_client=adb_client, - status=URLStatus.OK, count=11, ) url_mappings_all: list[SimpleURLMapping] = url_mappings_broken + url_mappings_ok diff --git a/tests/automated/integration/api/metrics/batches/test_breakdown.py b/tests/automated/integration/api/metrics/batches/test_breakdown.py index 6921c3c1..71b7c96b 100644 --- a/tests/automated/integration/api/metrics/batches/test_breakdown.py +++ b/tests/automated/integration/api/metrics/batches/test_breakdown.py @@ -2,7 +2,7 @@ import pytest -from src.collectors.enums import CollectorType, URLStatus +from src.collectors.enums import CollectorType from src.core.enums import BatchStatus from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.mapping_.simple import SimpleURLMapping diff --git a/tests/automated/integration/api/metrics/test_backlog.py b/tests/automated/integration/api/metrics/test_backlog.py index 181c295e..a6de442e 100644 --- a/tests/automated/integration/api/metrics/test_backlog.py +++ b/tests/automated/integration/api/metrics/test_backlog.py @@ -1,7 +1,6 @@ import pendulum import pytest -from src.collectors.enums import URLStatus from src.db.dtos.url.mapping_.simple import SimpleURLMapping from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.data_creator.core import DBDataCreator diff --git a/tests/automated/integration/api/metrics/urls/aggregated/test_core.py b/tests/automated/integration/api/metrics/urls/aggregated/test_core.py index e203b722..5dc163c7 100644 --- a/tests/automated/integration/api/metrics/urls/aggregated/test_core.py +++ b/tests/automated/integration/api/metrics/urls/aggregated/test_core.py @@ -2,7 +2,7 @@ import pytest -from src.collectors.enums import CollectorType, URLStatus +from src.collectors.enums import CollectorType from src.db.dtos.url.mapping_.simple import SimpleURLMapping from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters @@ -46,7 +46,7 @@ async def test_get_urls_aggregated_metrics(api_test_helper): batch_2: int = await ddc.create_batch( strategy=CollectorType.AUTO_GOOGLER, ) - url_mappings_2_ok: list[SimpleURLMapping] = await ddc.create_urls(batch_id=batch_2, count=4, status=URLStatus.OK) + url_mappings_2_ok: list[SimpleURLMapping] = await ddc.create_urls(batch_id=batch_2, count=4) url_mappings_2_validated: list[SimpleURLMapping] = await ddc.create_validated_urls(count=1, validation_type=URLType.DATA_SOURCE) url_mappings_2_not_relevant: list[SimpleURLMapping] = await ddc.create_validated_urls(count=5, validation_type=URLType.NOT_RELEVANT) url_ids_2_validated: list[int] = [url_mapping.url_id for url_mapping in url_mappings_2_validated] diff --git a/tests/automated/integration/api/metrics/urls/breakdown/test_submitted.py b/tests/automated/integration/api/metrics/urls/breakdown/test_submitted.py index d0a25ab1..a9a52d2e 100644 --- a/tests/automated/integration/api/metrics/urls/breakdown/test_submitted.py +++ b/tests/automated/integration/api/metrics/urls/breakdown/test_submitted.py @@ -1,7 +1,7 @@ import pendulum import pytest -from src.collectors.enums import CollectorType, URLStatus +from src.collectors.enums import CollectorType from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters from tests.helpers.batch_creation_parameters.enums import URLCreationEnum from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters diff --git a/tests/automated/integration/api/proposals/__init__.py b/tests/automated/integration/api/proposals/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/api/proposals/test_agencies.py b/tests/automated/integration/api/proposals/test_agencies.py new file mode 100644 index 00000000..354481f1 --- /dev/null +++ b/tests/automated/integration/api/proposals/test_agencies.py @@ -0,0 +1,250 @@ +import pytest + +from src.api.endpoints.proposals.agencies.by_id.approve.response import ProposalAgencyApproveResponse +from src.api.endpoints.proposals.agencies.by_id.locations.get.response import ProposalAgencyGetLocationsOuterResponse +from src.api.endpoints.proposals.agencies.by_id.put.request import ProposalAgencyPutRequest +from src.api.endpoints.proposals.agencies.by_id.reject.request import ProposalAgencyRejectRequestModel +from src.api.endpoints.proposals.agencies.by_id.reject.response import ProposalAgencyRejectResponse +from src.api.endpoints.proposals.agencies.root.get.response import ProposalAgencyGetOuterResponse +from src.api.endpoints.submit.agency.enums import AgencyProposalRequestStatus +from src.api.endpoints.submit.agency.request import SubmitAgencyRequestModel +from src.api.endpoints.submit.agency.response import SubmitAgencyProposalResponse +from src.api.shared.models.message_response import MessageResponse +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from tests.automated.integration.api._helpers.RequestValidator import RequestValidator +from tests.automated.integration.conftest import MOCK_USER_ID +from tests.helpers.api_test_helper import APITestHelper +from tests.helpers.data_creator.models.creation_info.county import CountyCreationInfo +from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo +from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo + + +@pytest.mark.asyncio +async def test_agencies( + api_test_helper: APITestHelper, + pittsburgh_locality: LocalityCreationInfo, + allegheny_county: CountyCreationInfo, + pennsylvania: USStateCreationInfo +): + request = SubmitAgencyRequestModel( + name="test_agency", + agency_type=AgencyType.LAW_ENFORCEMENT, + jurisdiction_type=JurisdictionType.LOCAL, + location_ids=[ + allegheny_county.location_id, + pittsburgh_locality.location_id + ] + ) + + rv: RequestValidator = api_test_helper.request_validator + adb_client: AsyncDatabaseClient = api_test_helper.adb_client() + # Add pending agency + submit_response_success: SubmitAgencyProposalResponse = rv.post_v3( + "/submit/agency", + expected_model=SubmitAgencyProposalResponse, + json=request.model_dump(mode="json") + ) + assert submit_response_success.status == AgencyProposalRequestStatus.SUCCESS + proposal_id: int = submit_response_success.proposal_id + + # Try to submit duplicate agency and confirm it fails + submit_response_proposal_duplicate: SubmitAgencyProposalResponse = rv.post_v3( + "/submit/agency", + expected_model=SubmitAgencyProposalResponse, + json=request.model_dump(mode="json") + ) + assert submit_response_proposal_duplicate.status == AgencyProposalRequestStatus.PROPOSAL_DUPLICATE + assert submit_response_proposal_duplicate.proposal_id is None + assert submit_response_proposal_duplicate.details == "An agency with the same properties is already in the proposal queue." + + # Call GET endpoint + get_response_1: ProposalAgencyGetOuterResponse = rv.get_v3( + "/proposal/agencies", + expected_model=ProposalAgencyGetOuterResponse + ) + # Confirm agency is in response + assert len(get_response_1.results) == 1 + proposal = get_response_1.results[0] + assert proposal.id == proposal_id + assert proposal.name == request.name + assert proposal.proposing_user_id == MOCK_USER_ID + assert proposal.agency_type == request.agency_type + assert proposal.jurisdiction_type == request.jurisdiction_type + assert [loc.location_id for loc in proposal.locations] == request.location_ids + assert proposal.created_at is not None + + # Edit Endpoint + edit_response: MessageResponse = rv.put_v3( + f"/proposal/agencies/{proposal_id}", + expected_model=MessageResponse, + json=ProposalAgencyPutRequest( + name='Modified Agency', + type=AgencyType.AGGREGATED, + jurisdiction_type=JurisdictionType.COUNTY, + ).model_dump(mode="json") + ) + assert edit_response.message == "Proposed agency updated." + + # Confirm agency proposal is updated + get_response_1p5: ProposalAgencyGetOuterResponse = rv.get_v3( + "/proposal/agencies", + expected_model=ProposalAgencyGetOuterResponse + ) + # Confirm agency is in response + assert len(get_response_1p5.results) == 1 + proposal = get_response_1p5.results[0] + assert proposal.id == proposal_id + assert proposal.name == 'Modified Agency' + assert proposal.proposing_user_id == MOCK_USER_ID + assert proposal.agency_type == AgencyType.AGGREGATED + assert proposal.jurisdiction_type == JurisdictionType.COUNTY + assert [loc.location_id for loc in proposal.locations] == request.location_ids + assert proposal.created_at is not None + + + # Get locations for endpoint + get_locations_response: ProposalAgencyGetLocationsOuterResponse = rv.get_v3( + f"/proposal/agencies/{proposal_id}/locations", + expected_model=ProposalAgencyGetLocationsOuterResponse + ) + assert len(get_locations_response.results) == 2 + # Check Location IDs match + assert {loc.location_id for loc in get_locations_response.results} == { + allegheny_county.location_id, + pittsburgh_locality.location_id + } + + # Add location to endpoint + add_locations_response: MessageResponse = rv.post_v3( + f"/proposal/agencies/{proposal_id}/locations/{pennsylvania.location_id}" + ) + # Check that location is added + get_locations_response: ProposalAgencyGetLocationsOuterResponse = rv.get_v3( + f"/proposal/agencies/{proposal_id}/locations", + expected_model=ProposalAgencyGetLocationsOuterResponse + ) + assert len(get_locations_response.results) == 3 + assert {loc.location_id for loc in get_locations_response.results} == { + allegheny_county.location_id, + pittsburgh_locality.location_id, + pennsylvania.location_id + } + + # Remove Location from endpoint + remove_location_response: MessageResponse = rv.delete_v3( + f"/proposal/agencies/{proposal_id}/locations/{pennsylvania.location_id}" + ) + # Check that location is removed + get_locations_response: ProposalAgencyGetLocationsOuterResponse = rv.get_v3( + f"/proposal/agencies/{proposal_id}/locations", + expected_model=ProposalAgencyGetLocationsOuterResponse + ) + assert len(get_locations_response.results) == 2 + assert {loc.location_id for loc in get_locations_response.results} == { + allegheny_county.location_id, + pittsburgh_locality.location_id, + } + + # Call APPROVE endpoint + approve_response: ProposalAgencyApproveResponse = rv.post_v3( + f"/proposal/agencies/{proposal_id}/approve", + expected_model=ProposalAgencyApproveResponse + ) + assert approve_response.message == "Proposed agency approved." + assert approve_response.success + assert approve_response.agency_id is not None + agency_id: int = approve_response.agency_id + + # Check agency is added + agencies: list[Agency] = await adb_client.get_all(Agency) + assert len(agencies) == 1 + agency = agencies[0] + assert agency.id == agency_id + assert agency.name == "Modified Agency" + assert agency.agency_type == AgencyType.AGGREGATED + assert agency.jurisdiction_type == JurisdictionType.COUNTY + + links: list[LinkAgencyLocation] = await adb_client.get_all(LinkAgencyLocation) + assert len(links) == 2 + assert {link.agency_id for link in links} == {agency.id} + assert {link.location_id for link in links} == set(request.location_ids) + + # Confirm agency is no longer in proposal queue + get_response_2: ProposalAgencyGetOuterResponse = rv.get_v3( + "/proposal/agencies", + expected_model=ProposalAgencyGetOuterResponse + ) + # Confirm agency is in response + assert len(get_response_2.results) == 0 + + # Try to submit agency again and confirm it fails + submit_response_accepted_duplicate: SubmitAgencyProposalResponse = rv.post_v3( + "/submit/agency", + expected_model=SubmitAgencyProposalResponse, + json=SubmitAgencyRequestModel( + name='Modified Agency', + agency_type=AgencyType.AGGREGATED, + jurisdiction_type=JurisdictionType.COUNTY, + location_ids=[ + allegheny_county.location_id, + pittsburgh_locality.location_id + ] + ).model_dump(mode="json") + ) + assert submit_response_accepted_duplicate.status == AgencyProposalRequestStatus.ACCEPTED_DUPLICATE + assert submit_response_accepted_duplicate.proposal_id is None + assert submit_response_accepted_duplicate.details == "An agency with the same properties is already approved." + + # Submit Separate Agency and Reject It + request_for_rejection = SubmitAgencyRequestModel( + name="Rejectable Agency", + agency_type=AgencyType.LAW_ENFORCEMENT, + jurisdiction_type=JurisdictionType.FEDERAL, + location_ids=[] + ) + submit_response_for_rejection: SubmitAgencyProposalResponse = rv.post_v3( + "/submit/agency", + expected_model=SubmitAgencyProposalResponse, + json=request_for_rejection.model_dump(mode="json") + ) + assert submit_response_for_rejection.status == AgencyProposalRequestStatus.SUCCESS + proposal_id_for_rejection: int = submit_response_for_rejection.proposal_id + + # Call REJECT endpoint + reject_response: ProposalAgencyRejectResponse = rv.post_v3( + f"/proposal/agencies/{proposal_id_for_rejection}/reject", + expected_model=ProposalAgencyRejectResponse, + json=ProposalAgencyRejectRequestModel( + rejection_reason="Test rejection reason" + ).model_dump(mode="json") + ) + assert reject_response.success + assert reject_response.message == "Proposed agency rejected." + + # Confirm does not appear in proposal queue OR final agency list + agencies = await adb_client.get_all(Agency) + assert len(agencies) == 1 + assert agencies[0].id == agency.id + + # Confirm cannot reject endpoint already approved + failed_reject_response: ProposalAgencyRejectResponse = rv.post_v3( + f"/proposal/agencies/{proposal_id}/reject", + expected_model=ProposalAgencyRejectResponse, + json=ProposalAgencyRejectRequestModel( + rejection_reason="Test rejection reason" + ).model_dump(mode="json") + ) + assert not failed_reject_response.success + assert failed_reject_response.message == "Proposed agency is not pending." + + # Confirm cannot approve endpoint already rejected + failed_approve_response: ProposalAgencyApproveResponse = rv.post_v3( + f"/proposal/agencies/{proposal_id_for_rejection}/approve", + expected_model=ProposalAgencyApproveResponse + ) + assert not failed_approve_response.success + assert failed_approve_response.message == "Proposed agency is not pending." + diff --git a/tests/automated/integration/api/submit/data_source/test_core.py b/tests/automated/integration/api/submit/data_source/test_core.py index 558327c3..bf339bfd 100644 --- a/tests/automated/integration/api/submit/data_source/test_core.py +++ b/tests/automated/integration/api/submit/data_source/test_core.py @@ -4,9 +4,13 @@ import pytest from src.api.endpoints.submit.data_source.request import DataSourceSubmissionRequest -from src.collectors.enums import URLStatus from src.core.enums import RecordType, BatchStatus from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL @@ -15,11 +19,6 @@ from src.db.models.impl.url.optional_ds_metadata.enums import AgencyAggregationEnum, UpdateMethodEnum, \ RetentionScheduleEnum, AccessTypeEnum from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata -from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency -from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation -from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion from tests.helpers.api_test_helper import APITestHelper from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo @@ -78,7 +77,6 @@ async def test_submit_data_source( assert url.scheme == "https" assert url.trailing_slash == True assert url.source == URLSource.MANUAL - assert url.status == URLStatus.OK assert url.description == "Example description" # Check for Batch @@ -96,31 +94,31 @@ async def test_submit_data_source( assert batch_url_link.url_id == url.id # Check for anonymous annotations - url_type_suggestion: AnonymousAnnotationURLType = await adb_client.one_or_none_model(AnonymousAnnotationURLType) + url_type_suggestion: AnnotationURLTypeAnon = await adb_client.one_or_none_model(AnnotationURLTypeAnon) assert url_type_suggestion is not None assert url_type_suggestion.url_id == url.id assert url_type_suggestion.url_type == URLType.DATA_SOURCE session_id: UUID = url_type_suggestion.session_id # Check for Location Suggestion - location_suggestion: AnonymousAnnotationLocation = await adb_client.one_or_none_model(AnonymousAnnotationLocation) + location_suggestion: AnnotationLocationAnon = await adb_client.one_or_none_model(AnnotationLocationAnon) assert location_suggestion is not None assert location_suggestion.location_id == pittsburgh_locality.location_id assert location_suggestion.session_id == session_id # Check for Agency Suggestion - agency_suggestion: AnonymousAnnotationAgency = await adb_client.one_or_none_model(AnonymousAnnotationAgency) + agency_suggestion: AnnotationAgencyAnon = await adb_client.one_or_none_model(AnnotationAgencyAnon) assert agency_suggestion is not None assert agency_suggestion.agency_id == test_agency_id assert agency_suggestion.session_id == session_id # Check for Name Suggestion - name_suggestion: URLNameSuggestion = await adb_client.one_or_none_model(URLNameSuggestion) + name_suggestion: AnnotationNameSuggestion = await adb_client.one_or_none_model(AnnotationNameSuggestion) assert name_suggestion is not None assert name_suggestion.suggestion == "Example name" # Check for Record Type Suggestion - record_type_suggestion: AnonymousAnnotationRecordType = await adb_client.one_or_none_model(AnonymousAnnotationRecordType) + record_type_suggestion: AnnotationRecordTypeAnon = await adb_client.one_or_none_model(AnnotationRecordTypeAnon) assert record_type_suggestion.record_type == RecordType.COMPLAINTS_AND_MISCONDUCT assert record_type_suggestion.session_id == session_id diff --git a/tests/automated/integration/api/submit/data_source/test_duplicate.py b/tests/automated/integration/api/submit/data_source/test_duplicate.py index ea16e1ec..87dd21a7 100644 --- a/tests/automated/integration/api/submit/data_source/test_duplicate.py +++ b/tests/automated/integration/api/submit/data_source/test_duplicate.py @@ -1,12 +1,13 @@ import pytest from fastapi import HTTPException -from src.api.endpoints.submit.data_source.models.response.duplicate import SubmitDataSourceURLDuplicateSubmissionResponse +from src.api.endpoints.submit.data_source.models.response.duplicate import \ + SubmitDataSourceURLDuplicateSubmissionResponse from src.api.endpoints.submit.data_source.request import DataSourceSubmissionRequest -from src.collectors.enums import URLStatus from src.core.enums import RecordType from src.db.dtos.url.mapping_.simple import SimpleURLMapping from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum from tests.helpers.api_test_helper import APITestHelper from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo @@ -18,6 +19,7 @@ async def test_submit_data_source_duplicate( pittsburgh_locality: LocalityCreationInfo, test_url_data_source_mapping: SimpleURLMapping ): + await api_test_helper.adb_client().refresh_materialized_views() ath = api_test_helper try: @@ -34,5 +36,5 @@ async def test_submit_data_source_duplicate( model = SubmitDataSourceURLDuplicateSubmissionResponse(**response) assert model.url_id == test_url_data_source_mapping.url_id assert model.url_type == URLType.DATA_SOURCE - assert model.url_status == URLStatus.OK + assert model.url_status == URLStatusViewEnum.AWAITING_SUBMISSION assert model.message == "Duplicate URL found" diff --git a/tests/automated/integration/api/submit/test_url_maximal.py b/tests/automated/integration/api/submit/test_url_maximal.py index e57770fb..5e9f0ec4 100644 --- a/tests/automated/integration/api/submit/test_url_maximal.py +++ b/tests/automated/integration/api/submit/test_url_maximal.py @@ -5,14 +5,14 @@ from src.api.endpoints.submit.url.models.response import URLSubmissionResponse from src.core.enums import RecordType from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser from src.db.models.impl.link.user_suggestion_not_found.users_submitted_url.sqlalchemy import LinkUserSubmittedURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from tests.helpers.api_test_helper import APITestHelper from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo @@ -56,32 +56,32 @@ async def test_maximal( link: LinkUserSubmittedURL = links[0] assert link.url_id == url_id - agen_suggs: list[UserURLAgencySuggestion] = await adb_client.get_all(UserURLAgencySuggestion) + agen_suggs: list[AnnotationAgencyUser] = await adb_client.get_all(AnnotationAgencyUser) assert len(agen_suggs) == 1 - agen_sugg: UserURLAgencySuggestion = agen_suggs[0] + agen_sugg: AnnotationAgencyUser = agen_suggs[0] assert agen_sugg.url_id == url_id assert agen_sugg.agency_id == agency_id - loc_suggs: list[UserLocationSuggestion] = await adb_client.get_all(UserLocationSuggestion) + loc_suggs: list[AnnotationLocationUser] = await adb_client.get_all(AnnotationLocationUser) assert len(loc_suggs) == 1 - loc_sugg: UserLocationSuggestion = loc_suggs[0] + loc_sugg: AnnotationLocationUser = loc_suggs[0] assert loc_sugg.url_id == url_id assert loc_sugg.location_id == pittsburgh_locality.location_id - name_sugg: list[URLNameSuggestion] = await adb_client.get_all(URLNameSuggestion) + name_sugg: list[AnnotationNameSuggestion] = await adb_client.get_all(AnnotationNameSuggestion) assert len(name_sugg) == 1 - name_sugg: URLNameSuggestion = name_sugg[0] + name_sugg: AnnotationNameSuggestion = name_sugg[0] assert name_sugg.url_id == url_id assert name_sugg.suggestion == "Example URL" assert name_sugg.source == NameSuggestionSource.USER - name_link_suggs: list[LinkUserNameSuggestion] = await adb_client.get_all(LinkUserNameSuggestion) + name_link_suggs: list[AnnotationNameUserEndorsement] = await adb_client.get_all(AnnotationNameUserEndorsement) assert len(name_link_suggs) == 1 - name_link_sugg: LinkUserNameSuggestion = name_link_suggs[0] + name_link_sugg: AnnotationNameUserEndorsement = name_link_suggs[0] assert name_link_sugg.suggestion_id == name_sugg.id - rec_suggs: list[UserRecordTypeSuggestion] = await adb_client.get_all(UserRecordTypeSuggestion) + rec_suggs: list[AnnotationRecordTypeUser] = await adb_client.get_all(AnnotationRecordTypeUser) assert len(rec_suggs) == 1 - rec_sugg: UserRecordTypeSuggestion = rec_suggs[0] + rec_sugg: AnnotationRecordTypeUser = rec_suggs[0] assert rec_sugg.url_id == url_id assert rec_sugg.record_type == RecordType.INCARCERATION_RECORDS.value diff --git a/tests/automated/integration/api/test_manual_batch.py b/tests/automated/integration/api/test_manual_batch.py index fa3f7884..ad8bfe3f 100644 --- a/tests/automated/integration/api/test_manual_batch.py +++ b/tests/automated/integration/api/test_manual_batch.py @@ -2,12 +2,12 @@ import pytest from src.api.endpoints.collector.dtos.manual_batch.post import ManualBatchInnerInputDTO, ManualBatchInputDTO -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.batch.sqlalchemy import Batch from src.collectors.enums import CollectorType from src.core.enums import RecordType +from src.db.models.impl.batch.sqlalchemy import Batch +from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata @pytest.mark.asyncio diff --git a/tests/automated/integration/api/url/by_id/delete/test_any_url.py b/tests/automated/integration/api/url/by_id/delete/test_any_url.py index 50b3ca0c..d61f1553 100644 --- a/tests/automated/integration/api/url/by_id/delete/test_any_url.py +++ b/tests/automated/integration/api/url/by_id/delete/test_any_url.py @@ -7,6 +7,25 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.mapping_.simple import SimpleURLMapping from src.db.enums import ChangeLogOperationType +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.change_log import ChangeLog from src.db.models.impl.flag.checked_for_ia.sqlalchemy import FlagURLCheckedForInternetArchives from src.db.models.impl.flag.root_url.sqlalchemy import FlagRootURL @@ -15,7 +34,6 @@ from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.link.url_redirect_url.sqlalchemy import LinkURLRedirectURL from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.impl.link.user_suggestion_not_found.users_submitted_url.sqlalchemy import LinkUserSubmittedURL @@ -26,24 +44,6 @@ from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata from src.db.models.impl.url.internet_archives.save.sqlalchemy import URLInternetArchivesSaveMetadata from src.db.models.impl.url.screenshot.sqlalchemy import URLScreenshot -from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode, AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency -from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation -from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion -from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.auto.sqlalchemy import AutoRelevantSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.queries.implementations.anonymous_session import MakeAnonymousSessionQueryBuilder @@ -128,34 +128,34 @@ async def _check_results( # ANNOTATIONS ## AUTO ### Agency - URLAutoAgencyIDSubtask, - AgencyIDSubtaskSuggestion, + AnnotationAgencyAutoSubtask, + AnnotationAgencyAutoSuggestion, ### Record Type - AutoRecordTypeSuggestion, + AnnotationAutoRecordType, ### URL Type - AutoRelevantSuggestion, + AnnotationAutoURLType, ### Location - AutoLocationIDSubtask, - LocationIDSubtaskSuggestion, + AnnotationLocationAutoSubtask, + AnnotationLocationAutoSuggestion, ## USER ### Agency - UserURLAgencySuggestion, + AnnotationAgencyUser, ### Record Type - UserRecordTypeSuggestion, + AnnotationRecordTypeUser, ### URL Type - UserURLTypeSuggestion, + AnnotationURLTypeUser, ### Location - UserLocationSuggestion, - URLNameSuggestion, + AnnotationLocationUser, + AnnotationNameSuggestion, ## ANONYMOUS ### Agency - AnonymousAnnotationAgency, + AnnotationAgencyAnon, ### Location - AnonymousAnnotationLocation, + AnnotationLocationAnon, ### Record Type - AnonymousAnnotationRecordType, + AnnotationRecordTypeAnon, ### URL Type - AnonymousAnnotationURLType, + AnnotationURLTypeAnon, ] for model in models: assert await dbc.get_all(model) == [] @@ -316,7 +316,7 @@ async def _setup( ### Agency #### Subtask agency_subtask_id: int = await dbc.add( - URLAutoAgencyIDSubtask( + AnnotationAgencyAutoSubtask( url_id=url.url_id, task_id=task_id, agencies_found=True, @@ -327,7 +327,7 @@ async def _setup( ) ### Suggestion await dbc.add( - AgencyIDSubtaskSuggestion( + AnnotationAgencyAutoSuggestion( subtask_id=agency_subtask_id, agency_id=agency_id, confidence=60 @@ -335,14 +335,14 @@ async def _setup( ) ### Record Type await dbc.add( - AutoRecordTypeSuggestion( + AnnotationAutoRecordType( url_id=url.url_id, record_type=RecordType.BOOKING_REPORTS.value ) ) ### Relevant await dbc.add( - AutoRelevantSuggestion( + AnnotationAutoURLType( url_id=url.url_id, relevant=True, confidence=0.5, @@ -352,7 +352,7 @@ async def _setup( ### Location #### Subtask location_subtask_id: int = await dbc.add( - AutoLocationIDSubtask( + AnnotationLocationAutoSubtask( url_id=url.url_id, task_id=task_id, locations_found=True, @@ -362,7 +362,7 @@ async def _setup( ) #### Suggestion await dbc.add( - LocationIDSubtaskSuggestion( + AnnotationLocationAutoSuggestion( subtask_id=location_subtask_id, location_id=pittsburgh_id, confidence=50 @@ -371,7 +371,7 @@ async def _setup( ## USER ### Agency await dbc.add( - UserURLAgencySuggestion( + AnnotationAgencyUser( url_id=url.url_id, user_id=1, agency_id=agency_id, @@ -380,7 +380,7 @@ async def _setup( ) ### Record Type await dbc.add( - UserRecordTypeSuggestion( + AnnotationRecordTypeUser( url_id=url.url_id, user_id=1, record_type=RecordType.BOOKING_REPORTS.value, @@ -388,7 +388,7 @@ async def _setup( ) ### URL Type await dbc.add( - UserURLTypeSuggestion( + AnnotationURLTypeUser( url_id=url.url_id, type=URLType.INDIVIDUAL_RECORD, user_id=1 @@ -396,7 +396,7 @@ async def _setup( ) ### Location await dbc.add( - UserLocationSuggestion( + AnnotationLocationUser( url_id=url.url_id, location_id=pittsburgh_id, user_id=1, @@ -404,7 +404,7 @@ async def _setup( ) ### Name name_suggestion_id: int = await dbc.add( - URLNameSuggestion( + AnnotationNameSuggestion( url_id=url.url_id, suggestion="Test Name", source=NameSuggestionSource.USER, @@ -412,7 +412,7 @@ async def _setup( return_id=True ) await dbc.add( - LinkUserNameSuggestion( + AnnotationNameUserEndorsement( suggestion_id=name_suggestion_id, user_id=1, ) @@ -423,25 +423,25 @@ async def _setup( ## ANONYMOUS for model in [ ### Agency - AnonymousAnnotationAgency( + AnnotationAgencyAnon( url_id=url.url_id, agency_id=agency_id, session_id=session_id, ), ### Record Type - AnonymousAnnotationRecordType( + AnnotationRecordTypeAnon( url_id=url.url_id, record_type=RecordType.BOOKING_REPORTS.value, session_id=session_id, ), ### URL Type - AnonymousAnnotationURLType( + AnnotationURLTypeAnon( url_id=url.url_id, url_type=URLType.INDIVIDUAL_RECORD, session_id=session_id, ), ### Location - AnonymousAnnotationLocation( + AnnotationLocationAnon( url_id=url.url_id, location_id=pittsburgh_id, session_id=session_id diff --git a/tests/automated/integration/api/url/by_id/snapshot/test_not_found.py b/tests/automated/integration/api/url/by_id/snapshot/test_not_found.py index cce84649..155b56d7 100644 --- a/tests/automated/integration/api/url/by_id/snapshot/test_not_found.py +++ b/tests/automated/integration/api/url/by_id/snapshot/test_not_found.py @@ -1,7 +1,8 @@ import pytest +from fastapi import Response from tests.helpers.api_test_helper import APITestHelper -from fastapi import Response + @pytest.mark.asyncio async def test_get_url_screenshot_not_found(api_test_helper: APITestHelper): diff --git a/tests/automated/integration/api/url/test_get.py b/tests/automated/integration/api/url/test_get.py index 8c95c670..d1607f7c 100644 --- a/tests/automated/integration/api/url/test_get.py +++ b/tests/automated/integration/api/url/test_get.py @@ -28,6 +28,7 @@ async def test_get_urls(api_test_helper: APITestHelper): # Add errors await db_data_creator.task_errors(url_ids=url_ids) + await api_test_helper.adb_client().refresh_materialized_views() data: GetURLsResponseInfo = api_test_helper.request_validator.get_urls() assert data.count == 3 diff --git a/tests/automated/integration/conftest.py b/tests/automated/integration/conftest.py index 19a9fe19..c15ba98c 100644 --- a/tests/automated/integration/conftest.py +++ b/tests/automated/integration/conftest.py @@ -6,7 +6,6 @@ from starlette.testclient import TestClient from src.api.main import app -from src.collectors.enums import URLStatus from src.collectors.manager import AsyncCollectorManager from src.core.core import AsyncCore from src.core.enums import RecordType @@ -19,7 +18,7 @@ from src.db.models.impl.url.core.sqlalchemy import URL from src.security.dtos.access_info import AccessInfo from src.security.enums import Permissions -from src.security.manager import get_access_info +from src.security.manager import get_admin_access_info, get_standard_user_access_info from tests.automated.integration.api._helpers.RequestValidator import RequestValidator from tests.helpers.api_test_helper import APITestHelper from tests.helpers.data_creator.core import DBDataCreator @@ -134,7 +133,8 @@ def override_access_info() -> AccessInfo: @pytest.fixture(scope="session") def client(disable_task_flags) -> Generator[TestClient, None, None]: with TestClient(app) as c: - app.dependency_overrides[get_access_info] = override_access_info + app.dependency_overrides[get_admin_access_info] = override_access_info + app.dependency_overrides[get_standard_user_access_info] = override_access_info async_core: AsyncCore = c.app.state.async_core # Interfaces to the web should be mocked @@ -244,21 +244,9 @@ async def test_url_id( url="example.com", source=URLSource.COLLECTOR, trailing_slash=False, - status=URLStatus.OK ) return await db_data_creator.adb_client.add(url, return_id=True) -@pytest_asyncio.fixture -async def test_url_id_2( - db_data_creator: DBDataCreator, -) -> int: - url = URL( - url="example.com/2", - source=URLSource.COLLECTOR, - trailing_slash=False, - status=URLStatus.OK - ) - return await db_data_creator.adb_client.add(url, return_id=True) @pytest_asyncio.fixture diff --git a/tests/automated/integration/core/async_/conclude_task/test_error.py b/tests/automated/integration/core/async_/conclude_task/test_error.py index 1a31b87e..a747aa3a 100644 --- a/tests/automated/integration/core/async_/conclude_task/test_error.py +++ b/tests/automated/integration/core/async_/conclude_task/test_error.py @@ -1,6 +1,5 @@ import pytest -from src.core.enums import BatchStatus from src.core.tasks.url.enums import TaskOperatorOutcome from src.db.models.impl.task.enums import TaskStatus from tests.automated.integration.core.async_.conclude_task.helpers import setup_run_info diff --git a/tests/automated/integration/core/async_/conclude_task/test_success.py b/tests/automated/integration/core/async_/conclude_task/test_success.py index 03cc5b52..eb0e8988 100644 --- a/tests/automated/integration/core/async_/conclude_task/test_success.py +++ b/tests/automated/integration/core/async_/conclude_task/test_success.py @@ -1,6 +1,5 @@ import pytest -from src.core.enums import BatchStatus from src.core.tasks.url.enums import TaskOperatorOutcome from src.db.models.impl.task.enums import TaskStatus from tests.automated.integration.core.async_.conclude_task.helpers import setup_run_info diff --git a/tests/automated/integration/core/async_/run_task/test_break_loop.py b/tests/automated/integration/core/async_/run_task/test_break_loop.py index 71b5704f..0235bc08 100644 --- a/tests/automated/integration/core/async_/run_task/test_break_loop.py +++ b/tests/automated/integration/core/async_/run_task/test_break_loop.py @@ -4,10 +4,10 @@ import pytest from src.core.tasks.base.run_info import TaskOperatorRunInfo +from src.core.tasks.url.enums import TaskOperatorOutcome from src.core.tasks.url.models.entry import URLTaskEntry from src.core.tasks.url.operators.base import URLTaskOperatorBase from src.db.enums import TaskType -from src.core.tasks.url.enums import TaskOperatorOutcome from tests.automated.integration.core.async_.helpers import setup_async_core from tests.helpers.data_creator.core import DBDataCreator diff --git a/tests/automated/integration/core/async_/run_task/test_prereq_met.py b/tests/automated/integration/core/async_/run_task/test_prereq_met.py index e5425fd9..8d68034f 100644 --- a/tests/automated/integration/core/async_/run_task/test_prereq_met.py +++ b/tests/automated/integration/core/async_/run_task/test_prereq_met.py @@ -3,13 +3,11 @@ import pytest -from src.core.enums import BatchStatus from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.url.enums import TaskOperatorOutcome from src.core.tasks.url.models.entry import URLTaskEntry from src.core.tasks.url.operators.base import URLTaskOperatorBase from src.db.enums import TaskType -from src.db.models.impl.task.core import Task from tests.automated.integration.core.async_.helpers import setup_async_core from tests.helpers.data_creator.core import DBDataCreator diff --git a/tests/automated/integration/db/client/annotate_url/test_agency_not_in_db.py b/tests/automated/integration/db/client/annotate_url/test_agency_not_in_db.py index c419fb70..a91873a7 100644 --- a/tests/automated/integration/db/client/annotate_url/test_agency_not_in_db.py +++ b/tests/automated/integration/db/client/annotate_url/test_agency_not_in_db.py @@ -2,8 +2,8 @@ from src.db.constants import PLACEHOLDER_AGENCY_NAME from src.db.models.impl.agency.sqlalchemy import Agency -from tests.helpers.setup.annotate_agency.core import setup_for_annotate_agency from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.setup.annotate_agency.core import setup_for_annotate_agency @pytest.mark.asyncio diff --git a/tests/automated/integration/db/client/approve_url/test_basic.py b/tests/automated/integration/db/client/approve_url/test_basic.py index f090a4ea..76150283 100644 --- a/tests/automated/integration/db/client/approve_url/test_basic.py +++ b/tests/automated/integration/db/client/approve_url/test_basic.py @@ -1,7 +1,6 @@ import pytest from src.api.endpoints.review.approve.dto import FinalReviewApprovalInfo -from src.collectors.enums import URLStatus from src.core.enums import RecordType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency @@ -9,15 +8,14 @@ from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType from src.db.models.impl.url.reviewing_user import ReviewingUserURL -from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review @pytest.mark.asyncio async def test_approve_url_basic(db_data_creator: DBDataCreator): setup_info = await setup_for_get_next_url_for_final_review( db_data_creator=db_data_creator, - annotation_count=3, include_user_annotations=True ) url_mapping = setup_info.url_mapping @@ -43,7 +41,6 @@ async def test_approve_url_basic(db_data_creator: DBDataCreator): assert len(urls) == 1 url = urls[0] assert url.id == url_mapping.url_id - assert url.status == URLStatus.OK assert url.name == "Test Name" assert url.description == "Test Description" diff --git a/tests/automated/integration/db/client/approve_url/test_error.py b/tests/automated/integration/db/client/approve_url/test_error.py index 352e737a..c8e33547 100644 --- a/tests/automated/integration/db/client/approve_url/test_error.py +++ b/tests/automated/integration/db/client/approve_url/test_error.py @@ -2,16 +2,14 @@ from starlette.exceptions import HTTPException from src.api.endpoints.review.approve.dto import FinalReviewApprovalInfo -from src.core.enums import RecordType -from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review @pytest.mark.asyncio async def test_approval_url_error(db_data_creator: DBDataCreator): setup_info = await setup_for_get_next_url_for_final_review( db_data_creator=db_data_creator, - annotation_count=3, include_user_annotations=True, include_miscellaneous_metadata=False ) diff --git a/tests/automated/integration/db/client/test_get_next_url_for_annotation_batch_filtering.py b/tests/automated/integration/db/client/test_get_next_url_for_annotation_batch_filtering.py index 86d4a3ee..c32441f3 100644 --- a/tests/automated/integration/db/client/test_get_next_url_for_annotation_batch_filtering.py +++ b/tests/automated/integration/db/client/test_get_next_url_for_annotation_batch_filtering.py @@ -2,8 +2,8 @@ from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.core.enums import SuggestionType -from tests.helpers.setup.annotation.core import setup_for_get_next_url_for_annotation from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.setup.annotation.core import setup_for_get_next_url_for_annotation @pytest.mark.asyncio diff --git a/tests/automated/integration/db/structure/test_updated_at.py b/tests/automated/integration/db/structure/test_updated_at.py index 0a4c18a4..d65c44c3 100644 --- a/tests/automated/integration/db/structure/test_updated_at.py +++ b/tests/automated/integration/db/structure/test_updated_at.py @@ -1,9 +1,7 @@ -import asyncio from datetime import datetime import pytest -from src.collectors.enums import URLStatus from src.db.models.impl.url.core.pydantic.upsert import URLUpsertModel from src.db.models.impl.url.core.sqlalchemy import URL from tests.helpers.data_creator.core import DBDataCreator @@ -14,7 +12,6 @@ async def test_updated_at(db_data_creator: DBDataCreator): _ = await db_data_creator.create_urls( count=1, - status=URLStatus.OK ) urls: list[URL] = await db_data_creator.adb_client.get_all(URL) diff --git a/tests/automated/integration/readonly/api/data_sources/by_id/test_get.py b/tests/automated/integration/readonly/api/data_sources/by_id/test_get.py index 16c30869..2abab495 100644 --- a/tests/automated/integration/readonly/api/data_sources/by_id/test_get.py +++ b/tests/automated/integration/readonly/api/data_sources/by_id/test_get.py @@ -3,6 +3,7 @@ from src.api.endpoints.data_source.get.response import DataSourceGetResponse from tests.automated.integration.readonly.helper import ReadOnlyTestHelper + @pytest.mark.asyncio async def test_get_by_id(readonly_helper: ReadOnlyTestHelper): raw_json: dict = readonly_helper.api_test_helper.request_validator.get_v3( diff --git a/tests/automated/integration/readonly/setup/annotations.py b/tests/automated/integration/readonly/setup/annotations.py index b07bbd9f..6829e714 100644 --- a/tests/automated/integration/readonly/setup/annotations.py +++ b/tests/automated/integration/readonly/setup/annotations.py @@ -1,13 +1,13 @@ from src.core.enums import RecordType from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement +from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser +from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion -from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion async def add_full_data_sources_annotations( @@ -17,7 +17,7 @@ async def add_full_data_sources_annotations( location_id: int, adb_client: AsyncDatabaseClient ) -> None: - name_suggestion = URLNameSuggestion( + name_suggestion = AnnotationNameSuggestion( url_id=url_id, suggestion="Name suggestion", source=NameSuggestionSource.USER @@ -26,26 +26,26 @@ async def add_full_data_sources_annotations( name_suggestion, return_id=True ) - url_type_suggestion = UserURLTypeSuggestion( + url_type_suggestion = AnnotationURLTypeUser( url_id=url_id, user_id=user_id, type=URLType.DATA_SOURCE ) - record_type_suggestion = UserRecordTypeSuggestion( + record_type_suggestion = AnnotationRecordTypeUser( user_id=user_id, url_id=url_id, record_type=RecordType.RECORDS_REQUEST_INFO.value ) - user_name_suggestion = LinkUserNameSuggestion( + user_name_suggestion = AnnotationNameUserEndorsement( user_id=user_id, suggestion_id=name_suggestion_id, ) - agency_suggestion = UserURLAgencySuggestion( + agency_suggestion = AnnotationAgencyUser( agency_id=agency_id, url_id=url_id, user_id=user_id, ) - location_suggestion = UserLocationSuggestion( + location_suggestion = AnnotationLocationUser( location_id=location_id, url_id=url_id, user_id=user_id, @@ -64,7 +64,7 @@ async def add_minimal_not_relevant_annotation( user_id: int, adb_client: AsyncDatabaseClient ) -> None: - url_type_suggestion = UserURLTypeSuggestion( + url_type_suggestion = AnnotationURLTypeUser( url_id=url_id, user_id=user_id, type=URLType.NOT_RELEVANT diff --git a/tests/automated/integration/readonly/setup/data_source.py b/tests/automated/integration/readonly/setup/data_source.py index e22929ee..d5984c06 100644 --- a/tests/automated/integration/readonly/setup/data_source.py +++ b/tests/automated/integration/readonly/setup/data_source.py @@ -1,6 +1,5 @@ from datetime import date -from src.collectors.enums import URLStatus from src.core.enums import RecordType from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.flag.url_validated.enums import URLType @@ -27,7 +26,6 @@ async def add_maximal_data_source( collector_metadata={ "url": "https://read-only.com/" }, - status=URLStatus.OK, source=URLSource.COLLECTOR, ) url_id: int = await adb_client.add(url, return_id=True) @@ -82,7 +80,6 @@ async def add_minimal_data_source( name="Minimal name", trailing_slash=False, collector_metadata={}, - status=URLStatus.OK, source=URLSource.ROOT_URL, ) url_id: int = await adb_client.add(url, return_id=True) diff --git a/tests/automated/integration/readonly/setup/meta_url.py b/tests/automated/integration/readonly/setup/meta_url.py index 837274bb..d5ea9da4 100644 --- a/tests/automated/integration/readonly/setup/meta_url.py +++ b/tests/automated/integration/readonly/setup/meta_url.py @@ -1,4 +1,3 @@ -from src.collectors.enums import URLStatus from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.url.core.enums import URLSource @@ -20,7 +19,6 @@ async def add_meta_url( collector_metadata={ "url": "https://read-only-meta-url.com/" }, - status=URLStatus.OK, source=URLSource.REDIRECT, ) url_id: int = await adb_client.add(url, return_id=True) diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py index 1d1085a5..f8fb2351 100644 --- a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/setup.py @@ -1,6 +1,5 @@ from sqlalchemy.ext.asyncio import AsyncSession -from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.sqlalchemy import URL @@ -38,7 +37,6 @@ async def run(self, session: AsyncSession) -> list[int]: url = URL( url=get_test_url(i), scheme=None, - status=URLStatus.OK, name=name, description=description, source=URLSource.COLLECTOR, diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_no_html_content_not_picked_up.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_no_html_content_not_picked_up.py index 25c4d09d..9c767f71 100644 --- a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_no_html_content_not_picked_up.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_no_html_content_not_picked_up.py @@ -3,7 +3,6 @@ from src.core.enums import RecordType from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator -from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse from src.db.client.async_ import AsyncDatabaseClient from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.check import check_not_called from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \ diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_not_relevant_picked_up.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_not_relevant_picked_up.py index b4abc0ee..d4c9d4c8 100644 --- a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_not_relevant_picked_up.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_not_relevant_picked_up.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from src.core.enums import RecordType from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator @@ -9,11 +8,11 @@ from src.db.client.async_ import AsyncDatabaseClient from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.check import check_results_called from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.data import generate_expected_outputs +from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \ + PushToHuggingFaceTestSetupStatusEnum from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.helper import setup_urls from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.input import \ TestPushToHuggingFaceURLSetupEntryInput -from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \ - PushToHuggingFaceTestSetupStatusEnum from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_validated_picked_up.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_validated_picked_up.py index 4ca89aa1..4ac74f4e 100644 --- a/tests/automated/integration/tasks/scheduled/impl/huggingface/test_validated_picked_up.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/test_validated_picked_up.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from src.core.enums import RecordType from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator @@ -9,11 +8,11 @@ from src.db.client.async_ import AsyncDatabaseClient from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.check import check_results_called from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.data import generate_expected_outputs +from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \ + PushToHuggingFaceTestSetupStatusEnum from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.helper import setup_urls from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.models.input import \ TestPushToHuggingFaceURLSetupEntryInput -from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \ - PushToHuggingFaceTestSetupStatusEnum from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error diff --git a/tests/automated/integration/tasks/scheduled/impl/internet_archives/probe/test_entry_not_found.py b/tests/automated/integration/tasks/scheduled/impl/internet_archives/probe/test_entry_not_found.py index 8a2157ed..80e6c129 100644 --- a/tests/automated/integration/tasks/scheduled/impl/internet_archives/probe/test_entry_not_found.py +++ b/tests/automated/integration/tasks/scheduled/impl/internet_archives/probe/test_entry_not_found.py @@ -4,8 +4,8 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.flag.checked_for_ia.sqlalchemy import FlagURLCheckedForInternetArchives from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata -from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.scheduled.impl.internet_archives.probe.setup import add_urls +from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/scheduled/impl/internet_archives/probe/test_happy_path.py b/tests/automated/integration/tasks/scheduled/impl/internet_archives/probe/test_happy_path.py index 90131605..96174e6b 100644 --- a/tests/automated/integration/tasks/scheduled/impl/internet_archives/probe/test_happy_path.py +++ b/tests/automated/integration/tasks/scheduled/impl/internet_archives/probe/test_happy_path.py @@ -6,9 +6,9 @@ from src.db.models.impl.flag.checked_for_ia.sqlalchemy import FlagURLCheckedForInternetArchives from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata from src.external.internet_archives.models.capture import IACapture -from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.scheduled.impl.internet_archives.probe.constants import TEST_URL_1, TEST_URL_2 from tests.automated.integration.tasks.scheduled.impl.internet_archives.probe.setup import add_urls +from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error @pytest.mark.asyncio diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_from_ds/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync_from_ds/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_from_ds/user_follows/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync_from_ds/user_follows/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_from_ds/user_follows/test_core.py b/tests/automated/integration/tasks/scheduled/impl/sync_from_ds/user_follows/test_core.py new file mode 100644 index 00000000..b95eb102 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_from_ds/user_follows/test_core.py @@ -0,0 +1,115 @@ +from http import HTTPStatus +from unittest.mock import AsyncMock + +import pytest +from pdap_access_manager.models.response import ResponseInfo + +from src.core.tasks.base.run_info import TaskOperatorRunInfo +from src.core.tasks.scheduled.impl.sync_from_ds.impl.follows.core import DSAppSyncUserFollowsGetTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.link.location__user_follow import LinkLocationUserFollow +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.follows.response import SyncFollowGetInnerResponse, SyncFollowGetOuterResponse +from tests.automated.integration.conftest import MOCK_USER_ID +from tests.helpers.asserts import assert_task_run_success +from tests.helpers.data_creator.models.creation_info.county import CountyCreationInfo +from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo +from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo + + +def mock_client( + mock_pdap_client: PDAPClient, + response: list[SyncFollowGetInnerResponse] +) -> None: + mock_pdap_client.access_manager.make_request = AsyncMock( + return_value=ResponseInfo( + status_code=HTTPStatus.OK, + data=SyncFollowGetOuterResponse( + follows=response + ).model_dump(mode='json') + ) + ) + +@pytest.mark.asyncio +async def test_core( + adb_client_test: AsyncDatabaseClient, + mock_pdap_client: PDAPClient, + pittsburgh_locality: LocalityCreationInfo, + allegheny_county: CountyCreationInfo, + pennsylvania: USStateCreationInfo +): + operator = DSAppSyncUserFollowsGetTaskOperator( + adb_client=adb_client_test, + pdap_client=mock_pdap_client + ) + + # Mock client to add 3 new follows + mock_client( + mock_pdap_client, + response=[ + SyncFollowGetInnerResponse( + user_id=MOCK_USER_ID, + location_id=pittsburgh_locality.location_id + ), + SyncFollowGetInnerResponse( + user_id=MOCK_USER_ID, + location_id=allegheny_county.location_id + ), + SyncFollowGetInnerResponse( + user_id=MOCK_USER_ID, + location_id=pennsylvania.location_id + ) + ] + ) + + # # Run Task + run_info: TaskOperatorRunInfo = await operator.run_task() + assert_task_run_success(run_info) + + # confirm three follows added + links: list[LinkLocationUserFollow] = await adb_client_test.get_all(LinkLocationUserFollow) + assert len(links) == 3 + link_tuples = [(link.user_id, link.location_id) for link in links] + assert (MOCK_USER_ID, pittsburgh_locality.location_id) in link_tuples + assert (MOCK_USER_ID, allegheny_county.location_id) in link_tuples + assert (MOCK_USER_ID, pennsylvania.location_id) in link_tuples + + # # Run Task again + run_info: TaskOperatorRunInfo = await operator.run_task() + assert_task_run_success(run_info) + + # # Confirm no new follows added + links: list[LinkLocationUserFollow] = await adb_client_test.get_all(LinkLocationUserFollow) + assert len(links) == 3 + link_tuples = [(link.user_id, link.location_id) for link in links] + assert (MOCK_USER_ID, pittsburgh_locality.location_id) in link_tuples + assert (MOCK_USER_ID, allegheny_county.location_id) in link_tuples + assert (MOCK_USER_ID, pennsylvania.location_id) in link_tuples + + + # Mock client to add only two of the follows + mock_client( + mock_pdap_client, + response=[ + SyncFollowGetInnerResponse( + user_id=MOCK_USER_ID, + location_id=pittsburgh_locality.location_id + ), + SyncFollowGetInnerResponse( + user_id=MOCK_USER_ID, + location_id=allegheny_county.location_id + ), + ] + ) + + # # Run Task again + run_info: TaskOperatorRunInfo = await operator.run_task() + assert_task_run_success(run_info) + # Confirm one of the follows is removed + + links: list[LinkLocationUserFollow] = await adb_client_test.get_all(LinkLocationUserFollow) + assert len(links) == 2 + link_tuples = [(link.user_id, link.location_id) for link in links] + assert (MOCK_USER_ID, pittsburgh_locality.location_id) in link_tuples + assert (MOCK_USER_ID, allegheny_county.location_id) in link_tuples + diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py index fa31dc40..2e57e042 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py @@ -5,7 +5,6 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.external.pdap.client import PDAPClient -from src.external.pdap.enums import DataSourcesURLStatus from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel from src.external.pdap.impl.sync.data_sources.add.request import AddDataSourcesOuterRequest, AddDataSourcesInnerRequest from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseModel, \ @@ -79,7 +78,6 @@ async def test_add( assert content.access_notes is None assert content.access_types == [] assert content.data_portal_type_other is None - assert content.url_status == DataSourcesURLStatus.OK assert content.agency_ids == [test_agency_id] diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_optional_ds_metadata.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_optional_ds_metadata.py index 94273019..6d52afc2 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_optional_ds_metadata.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_optional_ds_metadata.py @@ -1,7 +1,5 @@ from datetime import date -from sqlalchemy import update - from src.api.shared.models.message_response import MessageResponse from src.core.enums import RecordType from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.core import \ diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/test_add.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/test_add.py index e63e1496..dcdfb56b 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/test_add.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/test_add.py @@ -22,6 +22,11 @@ async def test_add( mock_pdap_client: PDAPClient, test_agency_id: int ): + await db_data_creator.create_web_metadata( + url_ids=[test_url_meta_url_id] + ) + + await db_data_creator.adb_client.refresh_materialized_views() operator = DSAppSyncMetaURLsAddTaskOperator( adb_client=adb_client_test, pdap_client=mock_pdap_client @@ -46,7 +51,6 @@ async def test_add( # Run task and confirm runs without error await run_task_and_confirm_success(operator) - # Confirm expected method was called with expected parameters request: AddMetaURLsOuterRequest = extract_and_validate_sync_request( mock_pdap_client, diff --git a/tests/automated/integration/tasks/scheduled/impl/update_url_status/test_core.py b/tests/automated/integration/tasks/scheduled/impl/update_url_status/test_core.py deleted file mode 100644 index 6b06fe31..00000000 --- a/tests/automated/integration/tasks/scheduled/impl/update_url_status/test_core.py +++ /dev/null @@ -1,77 +0,0 @@ -import pytest -from sqlalchemy import update - -from src.collectors.enums import URLStatus -from src.core.tasks.scheduled.impl.update_url_status.operator import UpdateURLStatusOperator -from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata -from tests.helpers.data_creator.core import DBDataCreator - - -@pytest.mark.asyncio -async def test_update_url_status_task( - test_url_data_source_id: int, - test_url_meta_url_id: int, - adb_client_test: AsyncDatabaseClient, - db_data_creator: DBDataCreator -): - - # Create Operator - operator = UpdateURLStatusOperator( - adb_client=adb_client_test, - ) - - # Add web metadata to URLs - ## Data Source URL: Add 404 - await db_data_creator.create_web_metadata( - url_ids=[test_url_data_source_id], - status_code=404 - ) - - ## Meta URL: Add 200 - await db_data_creator.create_web_metadata( - url_ids=[test_url_meta_url_id], - status_code=200 - ) - - # Run Task - await operator.run_task() - - # Check URLs - urls: list[URL] = await adb_client_test.get_all(URL) - id_status_set_tuple: set[tuple[int, URLStatus]] = { - (url.id, url.status) - for url in urls - } - ## Data Source URL: Status should now be broken - ## Meta URL: Status should be unchanged - assert id_status_set_tuple == { - (test_url_data_source_id, URLStatus.BROKEN), - (test_url_meta_url_id, URLStatus.OK) - } - - # Update Web Metadata for Data Source URL to be 404 - statement = update(URLWebMetadata).where( - URLWebMetadata.url_id == test_url_data_source_id, - ).values( - status_code=200 - ) - await adb_client_test.execute(statement) - - # Run Task - await operator.run_task() - - # Check URLs - urls: list[URL] = await adb_client_test.get_all(URL) - id_status_set_tuple: set[tuple[int, URLStatus]] = { - (url.id, url.status) - for url in urls - } - ## Data Source URL: Status should now be ok - ## Meta URL: Status should be unchanged - assert id_status_set_tuple == { - (test_url_data_source_id, URLStatus.OK), - (test_url_meta_url_id, URLStatus.OK) - } - diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/batch_link/test_core.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/batch_link/test_core.py index b39d74ca..e838ee3e 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/batch_link/test_core.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/batch_link/test_core.py @@ -2,10 +2,10 @@ from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from src.db.models.impl.link.agency_batch.sqlalchemy import LinkAgencyBatch -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters from tests.helpers.data_creator.core import DBDataCreator @@ -49,14 +49,14 @@ async def test_batch_link_subtask( assert not await operator.meets_task_prerequisites() assert operator._subtask is None - subtasks: list[URLAutoAgencyIDSubtask] = await adb_client.get_all(URLAutoAgencyIDSubtask) + subtasks: list[AnnotationAgencyAutoSubtask] = await adb_client.get_all(AnnotationAgencyAutoSubtask) assert len(subtasks) == 2 - subtask: URLAutoAgencyIDSubtask = subtasks[0] + subtask: AnnotationAgencyAutoSubtask = subtasks[0] assert subtask.type == AutoAgencyIDSubtaskType.BATCH_LINK assert subtask.agencies_found - suggestions: list[AgencyIDSubtaskSuggestion] = await adb_client.get_all(AgencyIDSubtaskSuggestion) + suggestions: list[AnnotationAgencyAutoSuggestion] = await adb_client.get_all(AnnotationAgencyAutoSuggestion) assert len(suggestions) == 2 assert all(sugg.confidence == 80 for sugg in suggestions) diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/ckan/test_core.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/ckan/test_core.py index 4ec99967..a1ba703f 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/ckan/test_core.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/ckan/test_core.py @@ -4,9 +4,9 @@ from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from tests.helpers.asserts import assert_task_run_success from tests.helpers.data_creator.core import DBDataCreator @@ -57,15 +57,15 @@ async def test_ckan_subtask( assert operator._subtask is None # Verify results - subtasks: list[URLAutoAgencyIDSubtask] = await adb_client.get_all(URLAutoAgencyIDSubtask) + subtasks: list[AnnotationAgencyAutoSubtask] = await adb_client.get_all(AnnotationAgencyAutoSubtask) assert len(subtasks) == 1 - subtask: URLAutoAgencyIDSubtask = subtasks[0] + subtask: AnnotationAgencyAutoSubtask = subtasks[0] assert subtask.type == AutoAgencyIDSubtaskType.CKAN assert subtask.url_id == applicable_url_id subtask_id: int = subtask.id - suggestions: list[AgencyIDSubtaskSuggestion] = await adb_client.get_all( - AgencyIDSubtaskSuggestion + suggestions: list[AnnotationAgencyAutoSuggestion] = await adb_client.get_all( + AnnotationAgencyAutoSuggestion ) assert len(suggestions) == 2 assert {suggestion.agency_id for suggestion in suggestions} == { diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/test_happy_path.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/test_happy_path.py index 7575f37e..7e72b733 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/test_happy_path.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/test_happy_path.py @@ -6,10 +6,10 @@ from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.mapping_.simple import SimpleURLMapping +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.helpers.data_creator.core import DBDataCreator @@ -113,17 +113,17 @@ async def test_homepage_match( adb_client: AsyncDatabaseClient = db_data_creator.adb_client # Confirm presence of subtasks - subtasks: list[URLAutoAgencyIDSubtask] = await adb_client.get_all(URLAutoAgencyIDSubtask) + subtasks: list[AnnotationAgencyAutoSubtask] = await adb_client.get_all(AnnotationAgencyAutoSubtask) assert len(subtasks) == 2 # Confirm both listed as agencies found assert all(subtask.agencies_found for subtask in subtasks) - url_id_to_subtask: dict[int, URLAutoAgencyIDSubtask] = { + url_id_to_subtask: dict[int, AnnotationAgencyAutoSubtask] = { subtask.url_id: subtask for subtask in subtasks } - single_subtask: URLAutoAgencyIDSubtask = url_id_to_subtask[single_url_id] - multi_subtask: URLAutoAgencyIDSubtask = url_id_to_subtask[multi_url_id] + single_subtask: AnnotationAgencyAutoSubtask = url_id_to_subtask[single_url_id] + multi_subtask: AnnotationAgencyAutoSubtask = url_id_to_subtask[multi_url_id] # Check subtasks have expected detail codes assert single_subtask.detail == SubtaskDetailCode.HOMEPAGE_SINGLE_AGENCY @@ -131,16 +131,16 @@ async def test_homepage_match( # Get suggestions - suggestions: list[AgencyIDSubtaskSuggestion] = await adb_client.get_all(AgencyIDSubtaskSuggestion) + suggestions: list[AnnotationAgencyAutoSuggestion] = await adb_client.get_all(AnnotationAgencyAutoSuggestion) assert len(suggestions) == 3 # Confirm each suggestion properly linked to expected subtask - subtask_id_to_suggestions: dict[int, list[AgencyIDSubtaskSuggestion]] = defaultdict(list) + subtask_id_to_suggestions: dict[int, list[AnnotationAgencyAutoSuggestion]] = defaultdict(list) for suggestion in suggestions: subtask_id_to_suggestions[suggestion.subtask_id].append(suggestion) # Check Single Agency Case Suggestion - single_suggestion: AgencyIDSubtaskSuggestion = \ + single_suggestion: AnnotationAgencyAutoSuggestion = \ subtask_id_to_suggestions[single_subtask.id][0] # Check Single Agency Case Suggestion has expected agency assert single_suggestion.agency_id == single_agency_id @@ -148,7 +148,7 @@ async def test_homepage_match( assert single_suggestion.confidence == 95 # Check Multi Agency Case Suggestion - multi_suggestions: list[AgencyIDSubtaskSuggestion] = subtask_id_to_suggestions[multi_subtask.id] + multi_suggestions: list[AnnotationAgencyAutoSuggestion] = subtask_id_to_suggestions[multi_subtask.id] # Check Multi Agency Case Suggestion has expected agencies assert {suggestion.agency_id for suggestion in multi_suggestions} \ == set(multi_agency_ids) diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/muckrock/test_core.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/muckrock/test_core.py index af41354d..aa38b33b 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/muckrock/test_core.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/muckrock/test_core.py @@ -6,9 +6,9 @@ from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from tests.helpers.asserts import assert_task_run_success from tests.helpers.data_creator.core import DBDataCreator @@ -93,15 +93,15 @@ async def test_muckrock_subtask( assert operator._subtask is None # Verify results - subtasks: list[URLAutoAgencyIDSubtask] = await adb_client.get_all(URLAutoAgencyIDSubtask) + subtasks: list[AnnotationAgencyAutoSubtask] = await adb_client.get_all(AnnotationAgencyAutoSubtask) assert len(subtasks) == 1 - subtask: URLAutoAgencyIDSubtask = subtasks[0] + subtask: AnnotationAgencyAutoSubtask = subtasks[0] assert subtask.type == AutoAgencyIDSubtaskType.MUCKROCK assert subtask.url_id == applicable_url_id subtask_id: int = subtask.id - suggestions: list[AgencyIDSubtaskSuggestion] = await adb_client.get_all( - AgencyIDSubtaskSuggestion + suggestions: list[AnnotationAgencyAutoSuggestion] = await adb_client.get_all( + AnnotationAgencyAutoSuggestion ) assert len(suggestions) == 2 assert {suggestion.agency_id for suggestion in suggestions} == { diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_multi_agency_location.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_multi_agency_location.py index 3da841a1..0df07b79 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_multi_agency_location.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_multi_agency_location.py @@ -2,9 +2,9 @@ from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo from tests.helpers.run import run_task_and_confirm_success @@ -53,16 +53,16 @@ async def test_multi_agency_location( assert not await operator.meets_task_prerequisites() # Check for presence of subtask - subtasks: list[URLAutoAgencyIDSubtask] = await adb_client.get_all(URLAutoAgencyIDSubtask) + subtasks: list[AnnotationAgencyAutoSubtask] = await adb_client.get_all(AnnotationAgencyAutoSubtask) assert len(subtasks) == 1 - subtask: URLAutoAgencyIDSubtask = subtasks[0] + subtask: AnnotationAgencyAutoSubtask = subtasks[0] assert subtask.type == AutoAgencyIDSubtaskType.NLP_LOCATION_MATCH # Confirm subtask lists agencies found assert subtask.agencies_found # Confirm multiple agency suggestions in database - suggestions: list[AgencyIDSubtaskSuggestion] = await adb_client.get_all(AgencyIDSubtaskSuggestion) + suggestions: list[AnnotationAgencyAutoSuggestion] = await adb_client.get_all(AnnotationAgencyAutoSuggestion) assert len(suggestions) == 2 # Confirm confidence of location suggestion is distributed evenly among agency suggestions diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_single_agency_location.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_single_agency_location.py index ecec3071..6e1ef42d 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_single_agency_location.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/nlp_location_match/end_to_end/test_single_agency_location.py @@ -2,9 +2,9 @@ from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask +from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.data_creator.models.creation_info.county import CountyCreationInfo from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo @@ -59,18 +59,18 @@ async def test_single_agency_location( assert not await operator.meets_task_prerequisites() # Check for presence of subtask - subtasks: list[URLAutoAgencyIDSubtask] = await adb_client.get_all(URLAutoAgencyIDSubtask) + subtasks: list[AnnotationAgencyAutoSubtask] = await adb_client.get_all(AnnotationAgencyAutoSubtask) assert len(subtasks) == 1 - subtask: URLAutoAgencyIDSubtask = subtasks[0] + subtask: AnnotationAgencyAutoSubtask = subtasks[0] assert subtask.type == AutoAgencyIDSubtaskType.NLP_LOCATION_MATCH # Confirm subtask lists agencies found assert subtask.agencies_found # Confirm single agency suggestion in database - suggestions: list[AgencyIDSubtaskSuggestion] = await adb_client.get_all(AgencyIDSubtaskSuggestion) + suggestions: list[AnnotationAgencyAutoSuggestion] = await adb_client.get_all(AnnotationAgencyAutoSuggestion) assert len(suggestions) == 1 # Confirm confidence of agency suggestion equal to location suggestion - suggestion: AgencyIDSubtaskSuggestion = suggestions[0] + suggestion: AnnotationAgencyAutoSuggestion = suggestions[0] assert suggestion.confidence == 68 diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/survey/test_survey_flag.py b/tests/automated/integration/tasks/url/impl/agency_identification/survey/test_survey_flag.py index 8ace042e..feeba3bd 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/survey/test_survey_flag.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/survey/test_survey_flag.py @@ -2,9 +2,10 @@ from src.collectors.enums import CollectorType from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType from tests.helpers.data_creator.core import DBDataCreator + @pytest.mark.asyncio async def test_survey_flag( operator: AgencyIdentificationTaskOperator, diff --git a/tests/automated/integration/tasks/url/impl/auto_name/test_core.py b/tests/automated/integration/tasks/url/impl/auto_name/test_core.py index c0500d99..66c09017 100644 --- a/tests/automated/integration/tasks/url/impl/auto_name/test_core.py +++ b/tests/automated/integration/tasks/url/impl/auto_name/test_core.py @@ -1,8 +1,8 @@ import pytest from src.core.tasks.url.operators.auto_name.core import AutoNameURLTaskOperator -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.run import run_task_and_confirm_success @@ -31,9 +31,9 @@ async def test_core( assert not await operator.meets_task_prerequisites() # Confirm suggestion was added - suggestions: list[URLNameSuggestion] = await db_data_creator.adb_client.get_all(URLNameSuggestion) + suggestions: list[AnnotationNameSuggestion] = await db_data_creator.adb_client.get_all(AnnotationNameSuggestion) assert len(suggestions) == 1 - suggestion: URLNameSuggestion = suggestions[0] + suggestion: AnnotationNameSuggestion = suggestions[0] assert suggestion.url_id == url_id assert suggestion.suggestion == "test html content" assert suggestion.source == NameSuggestionSource.HTML_METADATA_TITLE \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/auto_relevant/test_task.py b/tests/automated/integration/tasks/url/impl/auto_relevant/test_task.py index 3f4873f4..c9236f6c 100644 --- a/tests/automated/integration/tasks/url/impl/auto_relevant/test_task.py +++ b/tests/automated/integration/tasks/url/impl/auto_relevant/test_task.py @@ -1,11 +1,7 @@ -from collections import Counter - import pytest -from src.collectors.enums import URLStatus from src.core.tasks.url.operators.auto_relevant.core import URLAutoRelevantTaskOperator -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.url_type.auto.sqlalchemy import AutoRelevantSuggestion +from src.db.models.impl.annotation.url_type.auto.sqlalchemy import AnnotationAutoURLType from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_not_met, assert_prereqs_met from tests.automated.integration.tasks.url.impl.auto_relevant.setup import setup_operator, setup_urls @@ -31,7 +27,7 @@ async def test_url_auto_relevant_task(db_data_creator: DBDataCreator): adb_client = db_data_creator.adb_client # Confirm two annotations were created - suggestions: list[AutoRelevantSuggestion] = await adb_client.get_all(AutoRelevantSuggestion) + suggestions: list[AnnotationAutoURLType] = await adb_client.get_all(AnnotationAutoURLType) assert len(suggestions) == 2 for suggestion in suggestions: assert suggestion.url_id in url_ids diff --git a/tests/automated/integration/tasks/url/impl/html/check/manager.py b/tests/automated/integration/tasks/url/impl/html/check/manager.py deleted file mode 100644 index deb0fa11..00000000 --- a/tests/automated/integration/tasks/url/impl/html/check/manager.py +++ /dev/null @@ -1,68 +0,0 @@ -from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML -from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo -from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata -from tests.automated.integration.tasks.url.impl.html.setup.models.record import TestURLHTMLTaskSetupRecord - - -class TestURLHTMLTaskCheckManager: - - def __init__( - self, - adb_client: AsyncDatabaseClient, - records: list[TestURLHTMLTaskSetupRecord] - ): - self.adb_client = adb_client - self.records = records - self._id_to_entry = {record.url_id: record.entry for record in records} - - async def check(self): - await self._check_has_html() - await self._check_scrape_status() - await self._check_has_same_url_status() - await self._check_marked_as_404() - - async def _check_has_html(self) -> None: - urls_with_html = [ - record.url_id - for record in self.records - if record.entry.expected_result.has_html - ] - - compressed_html_list: list[URLCompressedHTML] = await self.adb_client.get_all(URLCompressedHTML) - assert len(compressed_html_list) == len(urls_with_html) - for compressed_html in compressed_html_list: - assert compressed_html.url_id in urls_with_html - - async def _check_scrape_status(self) -> None: - urls_with_scrape_status = [ - record.url_id - for record in self.records - if record.entry.expected_result.scrape_status is not None - ] - - url_scrape_info_list: list[URLScrapeInfo] = await self.adb_client.get_all(URLScrapeInfo) - assert len(url_scrape_info_list) == len(urls_with_scrape_status) - for url_scrape_info in url_scrape_info_list: - assert url_scrape_info.url_id in urls_with_scrape_status - entry = self._id_to_entry[url_scrape_info.url_id] - expected_scrape_status = entry.expected_result.scrape_status - assert url_scrape_info.status == expected_scrape_status - - async def _check_has_same_url_status(self): - urls: list[URL] = await self.adb_client.get_all(URL) - for url in urls: - entry = self._id_to_entry[url.id] - if entry.expected_result.web_metadata_status_marked_404: - continue - assert url.status == entry.url_info.status, f"URL {url.url} has outcome {url.status} instead of {entry.url_info.status}" - - async def _check_marked_as_404(self): - web_metadata_list: list[URLWebMetadata] = await self.adb_client.get_all( - URLWebMetadata - ) - for web_metadata in web_metadata_list: - entry = self._id_to_entry[web_metadata.url_id] - if entry.expected_result.web_metadata_status_marked_404: - assert web_metadata.status_code == 404, f"URL {entry.url_info.url} has status code {web_metadata.status_code} instead of 404" diff --git a/tests/automated/integration/tasks/url/impl/html/conftest.py b/tests/automated/integration/tasks/url/impl/html/conftest.py new file mode 100644 index 00000000..b73a93e5 --- /dev/null +++ b/tests/automated/integration/tasks/url/impl/html/conftest.py @@ -0,0 +1,28 @@ +import types + +import pytest + +from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator +from src.core.tasks.url.operators.html.scraper.parser.core import HTMLResponseParser +from src.db.client.async_ import AsyncDatabaseClient +from src.external.url_request.dtos.url_response import URLResponseInfo +from tests.automated.integration.tasks.url.impl.html.mocks.methods import mock_parse + + +class _MockURLRequestInterface: + + async def make_requests_with_html(self, urls: list[str]) -> list[URLResponseInfo]: + return [] + +@pytest.fixture +def operator( + adb_client_test: AsyncDatabaseClient +) -> URLHTMLTaskOperator: + html_parser = HTMLResponseParser() + html_parser.parse = types.MethodType(mock_parse, html_parser) + operator = URLHTMLTaskOperator( + adb_client=adb_client_test, + url_request_interface=_MockURLRequestInterface(), + html_parser=html_parser + ) + return operator \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/mocks/methods.py b/tests/automated/integration/tasks/url/impl/html/mocks/methods.py index d6799eea..0e0c5657 100644 --- a/tests/automated/integration/tasks/url/impl/html/mocks/methods.py +++ b/tests/automated/integration/tasks/url/impl/html/mocks/methods.py @@ -1,5 +1,3 @@ -from typing import Optional - from src.core.tasks.url.operators.html.scraper.parser.dtos.response_html import ResponseHTMLInfo @@ -10,6 +8,3 @@ async def mock_parse(self, url: str, html_content: str, content_type: str) -> Re description="fake description", ) - -async def mock_get_from_cache(self, url: str) -> Optional[str]: - return None diff --git a/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/core.py b/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/core.py deleted file mode 100644 index 49e6b1f3..00000000 --- a/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/core.py +++ /dev/null @@ -1,11 +0,0 @@ -from src.external.url_request.dtos.url_response import URLResponseInfo -from tests.automated.integration.tasks.url.impl.html.mocks.url_request_interface.setup import setup_url_to_response_info - - -class MockURLRequestInterface: - - def __init__(self): - self._url_to_response_info: dict[str, URLResponseInfo] = setup_url_to_response_info() - - async def make_requests_with_html(self, urls: list[str]) -> list[URLResponseInfo]: - return [self._url_to_response_info[url] for url in urls] \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/setup.py b/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/setup.py deleted file mode 100644 index c0dbef6a..00000000 --- a/tests/automated/integration/tasks/url/impl/html/mocks/url_request_interface/setup.py +++ /dev/null @@ -1,57 +0,0 @@ -from http import HTTPStatus - -from src.external.url_request.dtos.url_response import URLResponseInfo -from tests.automated.integration.tasks.url.impl.html.setup.data import TEST_ENTRIES -from tests.automated.integration.tasks.url.impl.html.setup.models.entry import TestURLHTMLTaskSetupEntry, TestErrorType - - -def _get_success( - entry: TestURLHTMLTaskSetupEntry -) -> bool: - if entry.give_error is not None: - return False - return True - -def get_http_status( - entry: TestURLHTMLTaskSetupEntry -) -> HTTPStatus: - if entry.give_error is None: - return HTTPStatus.OK - if entry.give_error == TestErrorType.HTTP_404: - return HTTPStatus.NOT_FOUND - return HTTPStatus.INTERNAL_SERVER_ERROR - -def _get_content_type( - entry: TestURLHTMLTaskSetupEntry -) -> str | None: - if entry.give_error is not None: - return None - return "text/html" - -def _generate_test_html() -> str: - return """ - - -
-This is an example of HTML content.
- - - """ - -def setup_url_to_response_info( -) -> dict[str, URLResponseInfo]: - d = {} - for entry in TEST_ENTRIES: - response_info = URLResponseInfo( - success=_get_success(entry), - status=get_http_status(entry), - html=_generate_test_html() if _get_success(entry) else None, - content_type=_get_content_type(entry), - exception=None if _get_success(entry) else "Error" - ) - d[entry.url_info.url] = response_info - return d diff --git a/tests/automated/integration/tasks/url/impl/html/setup/data.py b/tests/automated/integration/tasks/url/impl/html/setup/data.py deleted file mode 100644 index a3a43f8b..00000000 --- a/tests/automated/integration/tasks/url/impl/html/setup/data.py +++ /dev/null @@ -1,94 +0,0 @@ -from http import HTTPStatus - -from src.collectors.enums import URLStatus -from src.db.models.impl.url.scrape_info.enums import ScrapeStatus -from tests.automated.integration.tasks.url.impl.html.setup.models.entry import TestURLHTMLTaskSetupEntry, TestURLInfo, \ - TestWebMetadataInfo, ExpectedResult, TestErrorType - -TEST_ENTRIES = [ - # URLs that give 200s should be updated with the appropriate scrape status - # and their html should be stored - TestURLHTMLTaskSetupEntry( - url_info=TestURLInfo( - url="happy-path.com/pending", - status=URLStatus.OK - ), - web_metadata_info=TestWebMetadataInfo( - accessed=True, - content_type="text/html", - response_code=HTTPStatus.OK, - error_message=None - ), - expected_result=ExpectedResult( - has_html=True, # Test for both compressed HTML and content metadata - scrape_status=ScrapeStatus.SUCCESS - ) - ), - # URLs that give 404s should be updated with the appropriate scrape status - # and their web metadata status should be updated to 404 - TestURLHTMLTaskSetupEntry( - url_info=TestURLInfo( - url="not-found-path.com/submitted", - status=URLStatus.OK - ), - web_metadata_info=TestWebMetadataInfo( - accessed=True, - content_type="text/html", - response_code=HTTPStatus.OK, - error_message=None - ), - give_error=TestErrorType.HTTP_404, - expected_result=ExpectedResult( - has_html=False, - scrape_status=ScrapeStatus.ERROR, - web_metadata_status_marked_404=True - ) - ), - # URLs that give errors should be updated with the appropriate scrape status - TestURLHTMLTaskSetupEntry( - url_info=TestURLInfo( - url="error-path.com/submitted", - status=URLStatus.OK - ), - web_metadata_info=TestWebMetadataInfo( - accessed=True, - content_type="text/html", - response_code=HTTPStatus.OK, - error_message=None - ), - give_error=TestErrorType.SCRAPER, - expected_result=ExpectedResult( - has_html=False, - scrape_status=ScrapeStatus.ERROR - ) - ), - # URLs with non-200 web metadata should not be processed - TestURLHTMLTaskSetupEntry( - url_info=TestURLInfo( - url="not-200-path.com/submitted", - status=URLStatus.OK - ), - web_metadata_info=TestWebMetadataInfo( - accessed=True, - content_type="text/html", - response_code=HTTPStatus.PERMANENT_REDIRECT, - error_message=None - ), - expected_result=ExpectedResult( - has_html=False, - scrape_status=None - ) - ), - # URLs with no web metadata should not be processed - TestURLHTMLTaskSetupEntry( - url_info=TestURLInfo( - url="no-web-metadata.com/submitted", - status=URLStatus.OK - ), - web_metadata_info=None, - expected_result=ExpectedResult( - has_html=False, - scrape_status=None - ) - ) -] \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/setup/manager.py b/tests/automated/integration/tasks/url/impl/html/setup/manager.py deleted file mode 100644 index e01f7b6d..00000000 --- a/tests/automated/integration/tasks/url/impl/html/setup/manager.py +++ /dev/null @@ -1,79 +0,0 @@ -import types - -from src.core.enums import RecordType -from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator -from src.core.tasks.url.operators.html.scraper.parser.core import HTMLResponseParser -from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.url.core.enums import URLSource -from src.db.models.impl.url.core.pydantic.insert import URLInsertModel -from src.db.models.impl.url.web_metadata.insert import URLWebMetadataPydantic -from tests.automated.integration.tasks.url.impl.html.mocks.methods import mock_parse -from tests.automated.integration.tasks.url.impl.html.mocks.url_request_interface.core import MockURLRequestInterface -from tests.automated.integration.tasks.url.impl.html.setup.data import TEST_ENTRIES -from tests.automated.integration.tasks.url.impl.html.setup.models.record import TestURLHTMLTaskSetupRecord - - -class TestURLHTMLTaskSetupManager: - - def __init__(self, adb_client: AsyncDatabaseClient): - self.adb_client = adb_client - - - async def setup(self) -> list[TestURLHTMLTaskSetupRecord]: - - records = await self._setup_urls() - await self.setup_web_metadata(records) - return records - - async def _setup_urls(self) -> list[TestURLHTMLTaskSetupRecord]: - url_insert_models: list[URLInsertModel] = [] - for entry in TEST_ENTRIES: - url_insert_model = URLInsertModel( - status=entry.url_info.status, - url=entry.url_info.url, - name=f"Test for {entry.url_info.url}", - record_type=RecordType.RESOURCES, - source=URLSource.COLLECTOR, - trailing_slash=False - ) - url_insert_models.append(url_insert_model) - url_ids = await self.adb_client.bulk_insert(url_insert_models, return_ids=True) - - records = [] - for url_id, entry in zip(url_ids, TEST_ENTRIES): - record = TestURLHTMLTaskSetupRecord( - url_id=url_id, - entry=entry - ) - records.append(record) - return records - - async def setup_web_metadata( - self, - records: list[TestURLHTMLTaskSetupRecord] - ) -> None: - models = [] - for record in records: - entry = record.entry - web_metadata_info = entry.web_metadata_info - if web_metadata_info is None: - continue - model = URLWebMetadataPydantic( - url_id=record.url_id, - accessed=web_metadata_info.accessed, - status_code=web_metadata_info.response_code.value, - content_type=web_metadata_info.content_type, - error_message=web_metadata_info.error_message - ) - models.append(model) - await self.adb_client.bulk_insert(models) - -async def setup_operator() -> URLHTMLTaskOperator: - html_parser = HTMLResponseParser() - html_parser.parse = types.MethodType(mock_parse, html_parser) - operator = URLHTMLTaskOperator( - adb_client=AsyncDatabaseClient(), - url_request_interface=MockURLRequestInterface(), - html_parser=html_parser - ) - return operator diff --git a/tests/automated/integration/tasks/url/impl/html/setup/models/entry.py b/tests/automated/integration/tasks/url/impl/html/setup/models/entry.py deleted file mode 100644 index 287bb52c..00000000 --- a/tests/automated/integration/tasks/url/impl/html/setup/models/entry.py +++ /dev/null @@ -1,34 +0,0 @@ -from enum import Enum -from http import HTTPStatus - -from pydantic import BaseModel - -from src.collectors.enums import URLStatus -from src.db.models.impl.url.scrape_info.enums import ScrapeStatus - - -class TestErrorType(Enum): - SCRAPER = "scraper" - HTTP_404 = "http-404" - - -class TestWebMetadataInfo(BaseModel): - accessed: bool - content_type: str | None - response_code: HTTPStatus - error_message: str | None - -class TestURLInfo(BaseModel): - url: str - status: URLStatus - -class ExpectedResult(BaseModel): - has_html: bool - scrape_status: ScrapeStatus | None # Does not have scrape info if none - web_metadata_status_marked_404: bool = False - -class TestURLHTMLTaskSetupEntry(BaseModel): - url_info: TestURLInfo - web_metadata_info: TestWebMetadataInfo | None - give_error: TestErrorType | None = None - expected_result: ExpectedResult \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/setup/models/record.py b/tests/automated/integration/tasks/url/impl/html/setup/models/record.py deleted file mode 100644 index 022c9639..00000000 --- a/tests/automated/integration/tasks/url/impl/html/setup/models/record.py +++ /dev/null @@ -1,8 +0,0 @@ -from pydantic import BaseModel - -from tests.automated.integration.tasks.url.impl.html.setup.models.entry import TestURLHTMLTaskSetupEntry - - -class TestURLHTMLTaskSetupRecord(BaseModel): - url_id: int - entry: TestURLHTMLTaskSetupEntry \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/test_200.py b/tests/automated/integration/tasks/url/impl/html/test_200.py new file mode 100644 index 00000000..cdfbd2fe --- /dev/null +++ b/tests/automated/integration/tasks/url/impl/html/test_200.py @@ -0,0 +1,80 @@ +from http import HTTPStatus + +import pytest + +from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML +from src.db.models.impl.url.scrape_info.enums import ScrapeStatus +from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.external.url_request.dtos.url_response import URLResponseInfo +from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_met, assert_task_ran_without_error +from tests.helpers.data_creator.core import DBDataCreator + +MOCK_HTML_CONTENT = """ + + + +This is an example of HTML content.
+ + +""" + +class _MockURLRequestInterface: + + async def make_requests_with_html(self, urls: list[str]) -> list[URLResponseInfo]: + assert len(urls) == 1 + return [ + URLResponseInfo( + success=True, + status=HTTPStatus.OK, + exception=None, + html=MOCK_HTML_CONTENT, + content_type="text/html" + ) + ] + + +@pytest.mark.asyncio +async def test_200( + adb_client_test: AsyncDatabaseClient, + db_data_creator: DBDataCreator, + operator: URLHTMLTaskOperator, + test_url_id: int +): + """ + URLs that give 200s should be updated with the appropriate scrape status + and their html should be stored + """ + + await db_data_creator.create_web_metadata( + url_ids=[test_url_id], + status_code=200 + ) + + # Adjust Mock Request Interface to return a 404 + operator.url_request_interface = _MockURLRequestInterface() + + await assert_prereqs_met(operator) + + run_info = await operator.run_task() + assert_task_ran_without_error(run_info) + + # Check for the absence of Compressed HTML Data + results: list[URLCompressedHTML] = await adb_client_test.get_all(URLCompressedHTML) + assert len(results) == 1 + assert results[0].url_id == test_url_id + assert results[0].compressed_html is not None + + # Web Metadata should be unchanged + web_metadata: URLWebMetadata = (await adb_client_test.get_all(URLWebMetadata))[0] + assert web_metadata.status_code == 200 + + # Check that URLScrapeInfo is updated + scrape_info: URLScrapeInfo = (await adb_client_test.get_all(URLScrapeInfo))[0] + assert scrape_info.status == ScrapeStatus.SUCCESS \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/test_404.py b/tests/automated/integration/tasks/url/impl/html/test_404.py new file mode 100644 index 00000000..51589277 --- /dev/null +++ b/tests/automated/integration/tasks/url/impl/html/test_404.py @@ -0,0 +1,66 @@ +from http import HTTPStatus + +import pytest + +from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML +from src.db.models.impl.url.scrape_info.enums import ScrapeStatus +from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.external.url_request.dtos.url_response import URLResponseInfo +from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_met, assert_task_ran_without_error +from tests.helpers.data_creator.core import DBDataCreator + + +class _MockURLRequestInterface: + + async def make_requests_with_html(self, urls: list[str]) -> list[URLResponseInfo]: + assert len(urls) == 1 + return [ + URLResponseInfo( + success=False, + status=HTTPStatus.NOT_FOUND, + exception="Not Found" + ) + ] + + + +@pytest.mark.asyncio +async def test_404( + adb_client_test: AsyncDatabaseClient, + db_data_creator: DBDataCreator, + operator: URLHTMLTaskOperator, + test_url_id: int +): + """ + URLs that give 404s should be updated with the appropriate scrape status + and their web metadata status should be updated to 404 + """ + await db_data_creator.create_web_metadata( + url_ids=[test_url_id], + status_code=200 + ) + + + # Adjust Mock Request Interface to return a 404 + operator.url_request_interface = _MockURLRequestInterface() + + await assert_prereqs_met(operator) + + run_info = await operator.run_task() + assert_task_ran_without_error(run_info) + + + # Check for the absence of Compressed HTML Data + results: list[URLCompressedHTML] = await adb_client_test.get_all(URLCompressedHTML) + assert len(results) == 0 + + # Web Metadata should be unchanged + web_metadata: URLWebMetadata = (await adb_client_test.get_all(URLWebMetadata))[0] + assert web_metadata.status_code == 404 + + # Check that URLScrapeInfo is updated + scrape_info: URLScrapeInfo = (await adb_client_test.get_all(URLScrapeInfo))[0] + assert scrape_info.status == ScrapeStatus.ERROR \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/test_error.py b/tests/automated/integration/tasks/url/impl/html/test_error.py new file mode 100644 index 00000000..1290460f --- /dev/null +++ b/tests/automated/integration/tasks/url/impl/html/test_error.py @@ -0,0 +1,63 @@ +from http import HTTPStatus + +import pytest + +from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML +from src.db.models.impl.url.scrape_info.enums import ScrapeStatus +from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.external.url_request.dtos.url_response import URLResponseInfo +from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_met, assert_task_ran_without_error +from tests.helpers.data_creator.core import DBDataCreator + + +class _MockURLRequestInterface: + + async def make_requests_with_html(self, urls: list[str]) -> list[URLResponseInfo]: + assert len(urls) == 1 + return [ + URLResponseInfo( + success=False, + status=HTTPStatus.INTERNAL_SERVER_ERROR, + exception="Mock Exception" + ) + ] + +@pytest.mark.asyncio +async def test_error( + adb_client_test: AsyncDatabaseClient, + db_data_creator: DBDataCreator, + operator: URLHTMLTaskOperator, + test_url_id: int +): + """ + URLs that give errors should be updated with the appropriate scrape status + """ + await db_data_creator.create_web_metadata( + url_ids=[test_url_id], + status_code=200 + ) + + + # Adjust Mock Request Interface to return a 404 + operator.url_request_interface = _MockURLRequestInterface() + + await assert_prereqs_met(operator) + + run_info = await operator.run_task() + assert_task_ran_without_error(run_info) + + + # Check for the absence of Compressed HTML Data + results: list[URLCompressedHTML] = await adb_client_test.get_all(URLCompressedHTML) + assert len(results) == 0 + + # Web Metadata should be unchanged + web_metadata: URLWebMetadata = (await adb_client_test.get_all(URLWebMetadata))[0] + assert web_metadata.status_code == 200 + + # Check that URLScrapeInfo is updated + scrape_info: URLScrapeInfo = (await adb_client_test.get_all(URLScrapeInfo))[0] + assert scrape_info.status == ScrapeStatus.ERROR \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/test_no_web_metadata.py b/tests/automated/integration/tasks/url/impl/html/test_no_web_metadata.py new file mode 100644 index 00000000..06442164 --- /dev/null +++ b/tests/automated/integration/tasks/url/impl/html/test_no_web_metadata.py @@ -0,0 +1,26 @@ +import pytest + +from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML +from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_not_met, assert_task_ran_without_error + + +@pytest.mark.asyncio +async def test_no_web_metadata( + adb_client_test: AsyncDatabaseClient, + operator: URLHTMLTaskOperator, + test_url_id: int +): + """ + URLs with no web metadata should not be processed + """ + await assert_prereqs_not_met(operator) + + run_info = await operator.run_task() + assert_task_ran_without_error(run_info) + + # Check for the absence of Compressed HTML Data + results: list[URLCompressedHTML] = await adb_client_test.get_all(URLCompressedHTML) + assert len(results) == 0 + diff --git a/tests/automated/integration/tasks/url/impl/html/test_non_200.py b/tests/automated/integration/tasks/url/impl/html/test_non_200.py new file mode 100644 index 00000000..0b80ba86 --- /dev/null +++ b/tests/automated/integration/tasks/url/impl/html/test_non_200.py @@ -0,0 +1,32 @@ +import pytest + +from src.core.tasks.url.operators.html.core import URLHTMLTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML +from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_not_met, assert_task_ran_without_error +from tests.helpers.data_creator.core import DBDataCreator + + +@pytest.mark.asyncio +async def test_non_200( + adb_client_test: AsyncDatabaseClient, + db_data_creator: DBDataCreator, + operator: URLHTMLTaskOperator, + test_url_id: int +): + """ + URLs with non-200 web metadata should not be processed + """ + await db_data_creator.create_web_metadata( + url_ids=[test_url_id], + status_code=500 + ) + + await assert_prereqs_not_met(operator) + + run_info = await operator.run_task() + assert_task_ran_without_error(run_info) + + # Check for the absence of Compressed HTML Data + results: list[URLCompressedHTML] = await adb_client_test.get_all(URLCompressedHTML) + assert len(results) == 0 \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/html/test_task.py b/tests/automated/integration/tasks/url/impl/html/test_task.py deleted file mode 100644 index e7462e65..00000000 --- a/tests/automated/integration/tasks/url/impl/html/test_task.py +++ /dev/null @@ -1,33 +0,0 @@ -import pytest - -from src.db.client.async_ import AsyncDatabaseClient -from src.db.enums import TaskType -from tests.automated.integration.tasks.url.impl.asserts import assert_prereqs_not_met, assert_prereqs_met, \ - assert_task_ran_without_error -from tests.automated.integration.tasks.url.impl.html.check.manager import TestURLHTMLTaskCheckManager -from tests.automated.integration.tasks.url.impl.html.setup.manager import setup_operator, \ - TestURLHTMLTaskSetupManager - - -@pytest.mark.asyncio -async def test_url_html_task(adb_client_test: AsyncDatabaseClient): - setup = TestURLHTMLTaskSetupManager(adb_client_test) - - operator = await setup_operator() - - # No URLs were created, the prereqs should not be met - await assert_prereqs_not_met(operator) - - records = await setup.setup() - await assert_prereqs_met(operator) - - run_info = await operator.run_task() - assert_task_ran_without_error(run_info) - - checker = TestURLHTMLTaskCheckManager( - adb_client=adb_client_test, - records=records - ) - await checker.check() - - await assert_prereqs_not_met(operator) diff --git a/tests/automated/integration/tasks/url/impl/location_identification/subtasks/batch_link/test_core.py b/tests/automated/integration/tasks/url/impl/location_identification/subtasks/batch_link/test_core.py index ab505627..0c5238ae 100644 --- a/tests/automated/integration/tasks/url/impl/location_identification/subtasks/batch_link/test_core.py +++ b/tests/automated/integration/tasks/url/impl/location_identification/subtasks/batch_link/test_core.py @@ -2,10 +2,10 @@ from src.core.tasks.url.operators.location_id.core import LocationIdentificationTaskOperator from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion from src.db.models.impl.link.location_batch.sqlalchemy import LinkLocationBatch -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters from tests.helpers.data_creator.core import DBDataCreator @@ -51,13 +51,13 @@ async def test_batch_link_subtask( assert not await operator.meets_task_prerequisites() assert operator._subtask is None - subtasks: list[AutoLocationIDSubtask] = await adb_client.get_all(AutoLocationIDSubtask) + subtasks: list[AnnotationLocationAutoSubtask] = await adb_client.get_all(AnnotationLocationAutoSubtask) assert len(subtasks) == 2 - subtask: AutoLocationIDSubtask = subtasks[0] + subtask: AnnotationLocationAutoSubtask = subtasks[0] assert subtask.type == LocationIDSubtaskType.BATCH_LINK assert subtask.locations_found - suggestions: list[LocationIDSubtaskSuggestion] = await adb_client.get_all(LocationIDSubtaskSuggestion) + suggestions: list[AnnotationLocationAutoSuggestion] = await adb_client.get_all(AnnotationLocationAutoSuggestion) assert len(suggestions) == 2 assert all(sugg.confidence == 80 for sugg in suggestions) diff --git a/tests/automated/integration/tasks/url/impl/location_identification/subtasks/nlp_location_frequency/end_to_end/test_core.py b/tests/automated/integration/tasks/url/impl/location_identification/subtasks/nlp_location_frequency/end_to_end/test_core.py index f8f0c821..8d8bd7c6 100644 --- a/tests/automated/integration/tasks/url/impl/location_identification/subtasks/nlp_location_frequency/end_to_end/test_core.py +++ b/tests/automated/integration/tasks/url/impl/location_identification/subtasks/nlp_location_frequency/end_to_end/test_core.py @@ -9,11 +9,11 @@ from src.core.tasks.url.operators.location_id.subtasks.models.subtask import AutoLocationIDSubtaskData from src.core.tasks.url.operators.location_id.subtasks.models.suggestion import LocationSuggestion from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion from src.db.models.impl.link.task_url import LinkTaskURL -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.pydantic import AutoLocationIDSubtaskPydantic -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from tests.helpers.asserts import assert_task_run_success from tests.helpers.data_creator.core import DBDataCreator @@ -90,7 +90,7 @@ async def mock_process_inputs( assert {task_link.task_id for task_link in task_links} == {operator._task_id} # Confirm two subtasks were created - subtasks: list[AutoLocationIDSubtask] = await adb_client.get_all(AutoLocationIDSubtask) + subtasks: list[AnnotationLocationAutoSubtask] = await adb_client.get_all(AnnotationLocationAutoSubtask) assert len(subtasks) == 2 assert {subtask.url_id for subtask in subtasks} == set(url_ids) assert {subtask.task_id for subtask in subtasks} == {operator._task_id} @@ -108,7 +108,7 @@ async def mock_process_inputs( assert error_infos[0].error == "Test error" # Confirm two suggestions for happy path URL id - suggestions: list[LocationIDSubtaskSuggestion] = await adb_client.get_all(LocationIDSubtaskSuggestion) + suggestions: list[AnnotationLocationAutoSuggestion] = await adb_client.get_all(AnnotationLocationAutoSuggestion) assert len(suggestions) == 2 # Confirm expected agency ids assert {suggestion.location_id for suggestion in suggestions} == { diff --git a/tests/automated/integration/tasks/url/impl/location_identification/survey/test_survey_flag.py b/tests/automated/integration/tasks/url/impl/location_identification/survey/test_survey_flag.py index 338c604b..a5dca740 100644 --- a/tests/automated/integration/tasks/url/impl/location_identification/survey/test_survey_flag.py +++ b/tests/automated/integration/tasks/url/impl/location_identification/survey/test_survey_flag.py @@ -1,7 +1,7 @@ import pytest from src.core.tasks.url.operators.location_id.core import LocationIdentificationTaskOperator -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType from tests.helpers.data_creator.core import DBDataCreator diff --git a/tests/automated/integration/tasks/url/impl/probe/check/manager.py b/tests/automated/integration/tasks/url/impl/probe/check/manager.py index 200f428a..10505920 100644 --- a/tests/automated/integration/tasks/url/impl/probe/check/manager.py +++ b/tests/automated/integration/tasks/url/impl/probe/check/manager.py @@ -1,10 +1,10 @@ from sqlalchemy import select -from src.collectors.enums import URLStatus from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.link.url_redirect_url.sqlalchemy import LinkURLRedirectURL -from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.db.models.materialized_views.url_status.enums import URLStatusViewEnum +from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView class TestURLProbeCheckManager: @@ -18,13 +18,13 @@ def __init__( async def check_url( self, url_id: int, - expected_status: URLStatus + expected_status: URLStatusViewEnum ): - url: URL = await self.adb_client.one_or_none( - statement=select(URL).where(URL.id == url_id) + url: URLStatusMaterializedView = await self.adb_client.one_or_none( + statement=select(URLStatusMaterializedView).where(URLStatusMaterializedView.url_id == url_id) ) assert url is not None - assert url.status == expected_status + assert url.status == expected_status.value async def check_web_metadata( self, diff --git a/tests/automated/integration/tasks/url/impl/probe/models/entry.py b/tests/automated/integration/tasks/url/impl/probe/models/entry.py deleted file mode 100644 index 810f40ea..00000000 --- a/tests/automated/integration/tasks/url/impl/probe/models/entry.py +++ /dev/null @@ -1,10 +0,0 @@ -from pydantic import BaseModel - -from src.collectors.enums import URLStatus -from src.external.url_request.probe.models.wrapper import URLProbeResponseOuterWrapper - - -class TestURLProbeTaskEntry(BaseModel): - url: str - url_status: URLStatus - planned_response: URLProbeResponseOuterWrapper \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py index 85dd71f5..787d0d33 100644 --- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py @@ -1,8 +1,6 @@ import pytest -from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager @@ -32,19 +30,15 @@ async def test_url_probe_task_error( ) ) assert not await operator.meets_task_prerequisites() - url_id: int = await setup_manager.setup_url(URLStatus.OK) + url_id: int = await setup_manager.setup_url() await db_data_creator.create_validated_flags([url_id], validation_type=URLType.DATA_SOURCE) await db_data_creator.create_url_data_sources([url_id]) assert await operator.meets_task_prerequisites() + run_info = await operator.run_task() assert_task_ran_without_error(run_info) assert not await operator.meets_task_prerequisites() - await check_manager.check_url( - url_id=url_id, - expected_status=URLStatus.OK - ) - await check_manager.check_web_metadata( url_id=url_id, diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py index 31216e23..866e7533 100644 --- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_validated.enums import URLType from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager @@ -32,16 +31,13 @@ async def test_url_probe_task_not_found( ) ) assert not await operator.meets_task_prerequisites() - url_id = await setup_manager.setup_url(URLStatus.OK) + url_id = await setup_manager.setup_url() await db_data_creator.create_validated_flags([url_id], validation_type=URLType.NOT_RELEVANT) assert await operator.meets_task_prerequisites() run_info = await operator.run_task() assert_task_ran_without_error(run_info) assert not await operator.meets_task_prerequisites() - await check_manager.check_url( - url_id=url_id, - expected_status=URLStatus.OK - ) + await check_manager.check_web_metadata( url_id=url_id, status_code=404, diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py index ecaec084..dca1349e 100644 --- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_ok.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager @@ -28,15 +27,12 @@ async def test_url_probe_task_no_redirect_ok( ) ) assert not await operator.meets_task_prerequisites() - url_id = await setup_manager.setup_url(URLStatus.OK) + url_id = await setup_manager.setup_url() assert await operator.meets_task_prerequisites() run_info = await operator.run_task() assert_task_ran_without_error(run_info) assert not await operator.meets_task_prerequisites() - await check_manager.check_url( - url_id=url_id, - expected_status=URLStatus.OK - ) + await check_manager.check_web_metadata( url_id=url_id, status_code=200, diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py index c3b0c6c4..d628ea53 100644 --- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_two_urls.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from src.db.models.impl.url.core.sqlalchemy import URL from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager @@ -31,8 +30,8 @@ async def test_two_urls( ] ) assert not await operator.meets_task_prerequisites() - url_id_1 = await setup_manager.setup_url(URLStatus.OK, url=url_1) - url_id_2 = await setup_manager.setup_url(URLStatus.OK, url=url_2) + url_id_1 = await setup_manager.setup_url(url_1) + url_id_2 = await setup_manager.setup_url(url_2) assert await operator.meets_task_prerequisites() run_info = await operator.run_task() assert_task_ran_without_error(run_info) diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py b/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py index df695021..f7f9cb6e 100644 --- a/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py +++ b/tests/automated/integration/tasks/url/impl/probe/redirect/dest_new/test_dest_ok.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager @@ -28,13 +27,10 @@ async def test_url_probe_task_redirect_dest_new_ok( dest_error=None ) ) - source_url_id = await setup_manager.setup_url(URLStatus.OK) + source_url_id = await setup_manager.setup_url() run_info = await operator.run_task() assert_task_ran_without_error(run_info) - await check_manager.check_url( - url_id=source_url_id, - expected_status=URLStatus.OK - ) + await check_manager.check_web_metadata( url_id=source_url_id, status_code=301, @@ -43,10 +39,7 @@ async def test_url_probe_task_redirect_dest_new_ok( accessed=True ) dest_url_id = await check_manager.check_redirect(source_url_id) - await check_manager.check_url( - url_id=dest_url_id, - expected_status=URLStatus.OK - ) + await check_manager.check_web_metadata( url_id=dest_url_id, status_code=200, diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py b/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py index 7aeeb1f8..92729102 100644 --- a/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py +++ b/tests/automated/integration/tasks/url/impl/probe/redirect/test_dest_exists_in_db.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from src.db.models.impl.url.web_metadata.insert import URLWebMetadataPydantic from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager @@ -29,8 +28,8 @@ async def test_url_probe_task_redirect_dest_exists_in_db( dest_error=None ) ) - source_url_id = await setup_manager.setup_url(URLStatus.OK) - dest_url_id = await setup_manager.setup_url(URLStatus.OK, url=TEST_DEST_URL.replace("https://", "")) + source_url_id = await setup_manager.setup_url() + dest_url_id = await setup_manager.setup_url(TEST_DEST_URL.replace("https://", "")) # Add web metadata for destination URL, to prevent it from being pulled web_metadata = URLWebMetadataPydantic( url_id=dest_url_id, @@ -42,14 +41,6 @@ async def test_url_probe_task_redirect_dest_exists_in_db( await setup_manager.adb_client.bulk_insert([web_metadata]) run_info = await operator.run_task() assert_task_ran_without_error(run_info) - await check_manager.check_url( - url_id=source_url_id, - expected_status=URLStatus.OK - ) - await check_manager.check_url( - url_id=dest_url_id, - expected_status=URLStatus.OK - ) await check_manager.check_web_metadata( url_id=source_url_id, status_code=302, diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/test_functional_equivalent.py b/tests/automated/integration/tasks/url/impl/probe/redirect/test_functional_equivalent.py index a8cb51f7..cbf59b20 100644 --- a/tests/automated/integration/tasks/url/impl/probe/redirect/test_functional_equivalent.py +++ b/tests/automated/integration/tasks/url/impl/probe/redirect/test_functional_equivalent.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.util.models.full_url import FullURL @@ -31,7 +30,7 @@ async def test_url_probe_task_functional_equivalent( redirect_url=FullURL(TEST_URL + "/") ) ) - url_id = await setup_manager.setup_url(URLStatus.OK) + url_id = await setup_manager.setup_url() await run_task_and_confirm_success(operator) urls: list[URL] = await setup_manager.adb_client.get_all(URL) diff --git a/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py b/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py index 1dcd98d9..e8216f17 100644 --- a/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py +++ b/tests/automated/integration/tasks/url/impl/probe/redirect/test_two_urls_same_dest.py @@ -1,6 +1,5 @@ import pytest -from src.collectors.enums import URLStatus from src.util.models.full_url import FullURL from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager @@ -35,18 +34,11 @@ async def test_url_probe_task_redirect_two_urls_same_dest( ), ] ) - source_url_id_1 = await setup_manager.setup_url(URLStatus.OK) - source_url_id_2 = await setup_manager.setup_url(URLStatus.OK, url="example.com/2") + source_url_id_1 = await setup_manager.setup_url() + source_url_id_2 = await setup_manager.setup_url("example.com/2") run_info = await operator.run_task() assert_task_ran_without_error(run_info) - await check_manager.check_url( - url_id=source_url_id_1, - expected_status=URLStatus.OK - ) - await check_manager.check_url( - url_id=source_url_id_2, - expected_status=URLStatus.OK - ) + redirect_url_id_1 = await check_manager.check_redirect( source_url_id=source_url_id_1 ) diff --git a/tests/automated/integration/tasks/url/impl/probe/setup/manager.py b/tests/automated/integration/tasks/url/impl/probe/setup/manager.py index 44b5bd54..bf65e9f6 100644 --- a/tests/automated/integration/tasks/url/impl/probe/setup/manager.py +++ b/tests/automated/integration/tasks/url/impl/probe/setup/manager.py @@ -1,6 +1,5 @@ from typing import cast, Literal -from src.collectors.enums import URLStatus from src.core.tasks.url.operators.probe.core import URLProbeTaskOperator from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.url.core.pydantic.insert import URLInsertModel @@ -23,12 +22,10 @@ def __init__( async def setup_url( self, - url_status: URLStatus, url: str = TEST_URL ) -> int: url_insert_model = URLInsertModel( url=url, - status=url_status, source=TEST_SOURCE, trailing_slash=False ) diff --git a/tests/automated/integration/tasks/url/impl/root_url/test_two_branches_one_root_in_db_not_flagged.py b/tests/automated/integration/tasks/url/impl/root_url/test_two_branches_one_root_in_db_not_flagged.py index 8a40a476..384966a8 100644 --- a/tests/automated/integration/tasks/url/impl/root_url/test_two_branches_one_root_in_db_not_flagged.py +++ b/tests/automated/integration/tasks/url/impl/root_url/test_two_branches_one_root_in_db_not_flagged.py @@ -1,7 +1,6 @@ import pytest from src.core.tasks.url.operators.root_url.core import URLRootURLTaskOperator -from src.db.models.impl.flag.root_url.pydantic import FlagRootURLPydantic from src.db.models.impl.flag.root_url.sqlalchemy import FlagRootURL from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL from src.db.models.impl.url.core.enums import URLSource diff --git a/tests/automated/integration/tasks/url/impl/test_example_task.py b/tests/automated/integration/tasks/url/impl/test_example_task.py index 00ec7c34..c54425f7 100644 --- a/tests/automated/integration/tasks/url/impl/test_example_task.py +++ b/tests/automated/integration/tasks/url/impl/test_example_task.py @@ -2,12 +2,13 @@ import pytest -from src.db.enums import TaskType from src.core.tasks.url.enums import TaskOperatorOutcome from src.core.tasks.url.operators.base import URLTaskOperatorBase +from src.db.enums import TaskType from src.db.models.impl.link.task_url import LinkTaskURL from tests.helpers.data_creator.core import DBDataCreator + class ExampleTaskOperator( URLTaskOperatorBase, ): diff --git a/tests/automated/integration/tasks/url/impl/test_url_miscellaneous_metadata_task.py b/tests/automated/integration/tasks/url/impl/test_url_miscellaneous_metadata_task.py index bc3f240d..8a907fdc 100644 --- a/tests/automated/integration/tasks/url/impl/test_url_miscellaneous_metadata_task.py +++ b/tests/automated/integration/tasks/url/impl/test_url_miscellaneous_metadata_task.py @@ -2,11 +2,11 @@ import pytest -from src.core.tasks.url.operators.misc_metadata.core import URLMiscellaneousMetadataTaskOperator -from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata -from src.db.models.impl.url.core.sqlalchemy import URL from src.collectors.enums import CollectorType from src.core.tasks.url.enums import TaskOperatorOutcome +from src.core.tasks.url.operators.misc_metadata.core import URLMiscellaneousMetadataTaskOperator +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from tests.helpers.data_creator.core import DBDataCreator diff --git a/tests/automated/integration/tasks/url/impl/test_url_record_type_task.py b/tests/automated/integration/tasks/url/impl/test_url_record_type_task.py index 57f41ded..84471a70 100644 --- a/tests/automated/integration/tasks/url/impl/test_url_record_type_task.py +++ b/tests/automated/integration/tasks/url/impl/test_url_record_type_task.py @@ -2,14 +2,15 @@ import pytest -from src.db.enums import TaskType -from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion +from src.core.enums import RecordType from src.core.tasks.url.enums import TaskOperatorOutcome from src.core.tasks.url.operators.record_type.core import URLRecordTypeTaskOperator -from src.core.enums import RecordType +from src.core.tasks.url.operators.record_type.llm_api.record_classifier.deepseek import DeepSeekRecordClassifier +from src.db.enums import TaskType +from src.db.models.impl.annotation.record_type.auto.sqlalchemy import AnnotationAutoRecordType from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError from tests.helpers.data_creator.core import DBDataCreator -from src.core.tasks.url.operators.record_type.llm_api.record_classifier.deepseek import DeepSeekRecordClassifier + @pytest.mark.asyncio async def test_url_record_type_task(db_data_creator: DBDataCreator): @@ -49,7 +50,7 @@ async def test_url_record_type_task(db_data_creator: DBDataCreator): assert task.url_error_count == 1 # Get metadata - suggestions = await db_data_creator.adb_client.get_all(AutoRecordTypeSuggestion) + suggestions = await db_data_creator.adb_client.get_all(AnnotationAutoRecordType) for suggestion in suggestions: assert suggestion.record_type == RecordType.ACCIDENT_REPORTS.value diff --git a/tests/automated/integration/tasks/url/impl/validate/helper.py b/tests/automated/integration/tasks/url/impl/validate/helper.py index 879fbc66..ec9901dd 100644 --- a/tests/automated/integration/tasks/url/impl/validate/helper.py +++ b/tests/automated/integration/tasks/url/impl/validate/helper.py @@ -3,13 +3,13 @@ from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo from src.core.enums import RecordType from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource from src.db.models.impl.flag.auto_validated.sqlalchemy import FlagURLAutoValidated from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource from src.db.queries.implementations.anonymous_session import MakeAnonymousSessionQueryBuilder from tests.conftest import db_data_creator from tests.helpers.counter import next_int @@ -132,7 +132,7 @@ async def add_record_type_suggestions( async def add_name_suggestion( self, count: int = 1, - ) -> str: + ) -> int: name = f"Test Validate Task Name" suggestion_id: int = await self.db_data_creator.name_suggestion( url_id=self.url_id, @@ -144,7 +144,7 @@ async def add_name_suggestion( suggestion_id=suggestion_id, user_id=next_int(), ) - return name + return suggestion_id async def check_name(self) -> None: urls: list[URL] = await self.adb_client.get_all(URL) diff --git a/tests/automated/integration/tasks/url/impl/validate/test_data_source.py b/tests/automated/integration/tasks/url/impl/validate/test_data_source.py index 4fe0d444..b17f726e 100644 --- a/tests/automated/integration/tasks/url/impl/validate/test_data_source.py +++ b/tests/automated/integration/tasks/url/impl/validate/test_data_source.py @@ -12,11 +12,12 @@ from src.core.enums import RecordType from src.core.tasks.url.operators.validate.core import AutoValidateURLTaskOperator +from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon +from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon +from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement +from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon +from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency -from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation -from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType -from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType from tests.automated.integration.tasks.url.impl.validate.helper import TestValidateTaskHelper, DEFAULT_RECORD_TYPE from tests.helpers.run import run_task_and_confirm_success @@ -45,7 +46,7 @@ async def test_data_source( assert not await operator.meets_task_prerequisites() - await helper.add_name_suggestion(count=2) + suggestion_id: int = await helper.add_name_suggestion(count=1) assert not await operator.meets_task_prerequisites() @@ -54,31 +55,36 @@ async def test_data_source( session_id_2: UUID = await helper.get_anonymous_session_id() for session_id in [session_id_1, session_id_2]: - anon_url_type = AnonymousAnnotationURLType( + anon_url_type = AnnotationURLTypeAnon( url_type=URLType.DATA_SOURCE, session_id=session_id, url_id=helper.url_id ) - anon_record_type = AnonymousAnnotationRecordType( + anon_record_type = AnnotationRecordTypeAnon( record_type=DEFAULT_RECORD_TYPE, session_id=session_id, url_id=helper.url_id ) - anon_location = AnonymousAnnotationLocation( + anon_location = AnnotationLocationAnon( location_id=helper.location_id, session_id=session_id, url_id=helper.url_id ) - anon_agency = AnonymousAnnotationAgency( + anon_agency = AnnotationAgencyAnon( agency_id=helper.agency_id, session_id=session_id, url_id=helper.url_id ) + anon_name_link = AnnotationNameAnonEndorsement( + suggestion_id=suggestion_id, + session_id=session_id + ) for model in [ anon_url_type, anon_record_type, anon_location, - anon_agency + anon_agency, + anon_name_link ]: await helper.adb_client.add(model) @@ -95,7 +101,7 @@ async def test_data_source( # Add tiebreaker -- a single anonymous vote session_id_3: UUID = await helper.get_anonymous_session_id() - anon_record_type = AnonymousAnnotationRecordType( + anon_record_type = AnnotationRecordTypeAnon( record_type=DEFAULT_RECORD_TYPE, session_id=session_id_3, url_id=helper.url_id diff --git a/tests/automated/unit/core/test_core_logger.py b/tests/automated/unit/core/test_core_logger.py index 6c4f0375..01dae052 100644 --- a/tests/automated/unit/core/test_core_logger.py +++ b/tests/automated/unit/core/test_core_logger.py @@ -3,8 +3,8 @@ import pytest -from src.db.models.impl.log.pydantic.info import LogInfo from src.core.logger import AsyncCoreLogger +from src.db.models.impl.log.pydantic.info import LogInfo @pytest.mark.asyncio diff --git a/tests/automated/unit/security_manager/test_security_manager.py b/tests/automated/unit/security_manager/test_security_manager.py index 66399d7f..42ae8e4d 100644 --- a/tests/automated/unit/security_manager/test_security_manager.py +++ b/tests/automated/unit/security_manager/test_security_manager.py @@ -4,9 +4,9 @@ from fastapi import HTTPException from jwt import InvalidTokenError -from src.security.manager import SecurityManager, get_access_info from src.security.dtos.access_info import AccessInfo from src.security.enums import Permissions +from src.security.manager import SecurityManager, get_admin_access_info SECRET_KEY = "test_secret_key" VALID_TOKEN = "valid_token" @@ -64,6 +64,6 @@ def test_check_access_failure(mock_get_secret_key, mock_jwt_decode): def test_get_access_info(mock_get_secret_key, mock_jwt_decode): - access_info = get_access_info(token=VALID_TOKEN) + access_info = get_admin_access_info(token=VALID_TOKEN) assert access_info.user_id == 1 assert Permissions.SOURCE_COLLECTOR in access_info.permissions diff --git a/tests/automated/unit/source_collectors/test_autogoogler_collector.py b/tests/automated/unit/source_collectors/test_autogoogler_collector.py index cc191dc3..e4e617a1 100644 --- a/tests/automated/unit/source_collectors/test_autogoogler_collector.py +++ b/tests/automated/unit/source_collectors/test_autogoogler_collector.py @@ -2,11 +2,11 @@ import pytest -from src.collectors.impl.auto_googler.dtos.query_results import GoogleSearchQueryResultsInnerDTO +from src.collectors.impl.auto_googler.collector import AutoGooglerCollector from src.collectors.impl.auto_googler.dtos.input import AutoGooglerInputDTO -from src.db.client.async_ import AsyncDatabaseClient +from src.collectors.impl.auto_googler.dtos.query_results import GoogleSearchQueryResultsInnerDTO from src.core.logger import AsyncCoreLogger -from src.collectors.impl.auto_googler.collector import AutoGooglerCollector +from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.pydantic.info import URLInfo diff --git a/tests/automated/unit/source_collectors/test_common_crawl_collector.py b/tests/automated/unit/source_collectors/test_common_crawl_collector.py index 0a10680f..c76bad38 100644 --- a/tests/automated/unit/source_collectors/test_common_crawl_collector.py +++ b/tests/automated/unit/source_collectors/test_common_crawl_collector.py @@ -2,10 +2,10 @@ import pytest +from src.collectors.impl.common_crawler.collector import CommonCrawlerCollector from src.collectors.impl.common_crawler.input import CommonCrawlerInputDTO -from src.db.client.async_ import AsyncDatabaseClient from src.core.logger import AsyncCoreLogger -from src.collectors.impl.common_crawler.collector import CommonCrawlerCollector +from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.pydantic.info import URLInfo diff --git a/tests/automated/unit/source_collectors/test_example_collector.py b/tests/automated/unit/source_collectors/test_example_collector.py index 632a6293..c99217b0 100644 --- a/tests/automated/unit/source_collectors/test_example_collector.py +++ b/tests/automated/unit/source_collectors/test_example_collector.py @@ -1,9 +1,9 @@ from unittest.mock import AsyncMock -from src.db.client.sync import DatabaseClient -from src.collectors.impl.example.dtos.input import ExampleInputDTO from src.collectors.impl.example.core import ExampleCollector +from src.collectors.impl.example.dtos.input import ExampleInputDTO from src.core.logger import AsyncCoreLogger +from src.db.client.sync import DatabaseClient def test_example_collector(): diff --git a/tests/automated/unit/source_collectors/test_muckrock_collectors.py b/tests/automated/unit/source_collectors/test_muckrock_collectors.py index 6c845b8e..009e550a 100644 --- a/tests/automated/unit/source_collectors/test_muckrock_collectors.py +++ b/tests/automated/unit/source_collectors/test_muckrock_collectors.py @@ -4,12 +4,12 @@ import pytest from src.collectors.impl.muckrock.collectors.county.core import MuckrockCountyLevelSearchCollector -from src.collectors.impl.muckrock.collectors.simple.core import MuckrockSimpleSearchCollector -from src.db.client.async_ import AsyncDatabaseClient -from src.core.logger import AsyncCoreLogger from src.collectors.impl.muckrock.collectors.county.dto import MuckrockCountySearchCollectorInputDTO +from src.collectors.impl.muckrock.collectors.simple.core import MuckrockSimpleSearchCollector from src.collectors.impl.muckrock.collectors.simple.dto import MuckrockSimpleSearchCollectorInputDTO from src.collectors.impl.muckrock.fetch_requests.foia import FOIAFetchRequest +from src.core.logger import AsyncCoreLogger +from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.pydantic.info import URLInfo diff --git a/tests/helpers/batch_creation_parameters/core.py b/tests/helpers/batch_creation_parameters/core.py index 4562cbdf..3719dae0 100644 --- a/tests/helpers/batch_creation_parameters/core.py +++ b/tests/helpers/batch_creation_parameters/core.py @@ -1,5 +1,4 @@ import datetime -from typing import Optional from pydantic import BaseModel, model_validator diff --git a/tests/helpers/data_creator/commands/impl/html_data.py b/tests/helpers/data_creator/commands/impl/html_data.py index 38ecb4bd..dbfe39f1 100644 --- a/tests/helpers/data_creator/commands/impl/html_data.py +++ b/tests/helpers/data_creator/commands/impl/html_data.py @@ -1,11 +1,9 @@ -from src.db.dtos.url.html_content import URLHTMLContentInfo -from src.db.models.impl.url.html.content.enums import HTMLContentType from src.db.dtos.url.raw_html import RawHTMLInfo +from src.db.models.impl.url.html.content.enums import HTMLContentType from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent from src.db.models.impl.url.scrape_info.enums import ScrapeStatus from src.db.models.impl.url.scrape_info.pydantic import URLScrapeInfoInsertModel from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase -from tests.helpers.data_creator.models.clients import DBDataCreatorClientContainer class HTMLDataCreatorCommand(DBDataCreatorCommandBase): diff --git a/tests/helpers/data_creator/commands/impl/suggestion/agency_confirmed.py b/tests/helpers/data_creator/commands/impl/suggestion/agency_confirmed.py index e096d15e..0a293e71 100644 --- a/tests/helpers/data_creator/commands/impl/suggestion/agency_confirmed.py +++ b/tests/helpers/data_creator/commands/impl/suggestion/agency_confirmed.py @@ -7,6 +7,7 @@ from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase from tests.helpers.data_creator.commands.impl.agency import AgencyCommand + @final class AgencyConfirmedSuggestionCommand(DBDataCreatorCommandBase): diff --git a/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/core.py b/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/core.py index fe54c6f9..e714714d 100644 --- a/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/core.py +++ b/tests/helpers/data_creator/commands/impl/suggestion/auto/agency_/core.py @@ -3,14 +3,14 @@ from typing_extensions import override from src.core.enums import SuggestionType -from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo from src.db.enums import TaskType -from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType -from src.db.models.impl.url.suggestion.agency.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic -from src.db.models.impl.url.suggestion.agency.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic +from src.db.models.impl.annotation.agency.auto.subtask.enum import AutoAgencyIDSubtaskType +from src.db.models.impl.annotation.agency.auto.subtask.pydantic import URLAutoAgencyIDSubtaskPydantic +from src.db.models.impl.annotation.agency.auto.suggestion.pydantic import AgencyIDSubtaskSuggestionPydantic from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase from tests.helpers.data_creator.commands.impl.agency import AgencyCommand + @final class AgencyAutoSuggestionsCommand(DBDataCreatorCommandBase): diff --git a/tests/helpers/data_creator/commands/impl/suggestion/auto/relevant.py b/tests/helpers/data_creator/commands/impl/suggestion/auto/relevant.py index d85b5a1b..498f736c 100644 --- a/tests/helpers/data_creator/commands/impl/suggestion/auto/relevant.py +++ b/tests/helpers/data_creator/commands/impl/suggestion/auto/relevant.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.url_type.auto.pydantic.input import AutoRelevancyAnnotationInput +from src.db.models.impl.annotation.url_type.auto.pydantic.input import AutoRelevancyAnnotationInput from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase diff --git a/tests/helpers/data_creator/commands/impl/urls_/convert.py b/tests/helpers/data_creator/commands/impl/urls_/convert.py index c1e2db31..a7d2bdd1 100644 --- a/tests/helpers/data_creator/commands/impl/urls_/convert.py +++ b/tests/helpers/data_creator/commands/impl/urls_/convert.py @@ -1,25 +1,6 @@ -from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.batch_creation_parameters.enums import URLCreationEnum - -def convert_url_creation_enum_to_url_status(url_creation_enum: URLCreationEnum) -> URLStatus: - match url_creation_enum: - case URLCreationEnum.OK: - return URLStatus.OK - case URLCreationEnum.SUBMITTED: - return URLStatus.OK - case URLCreationEnum.VALIDATED: - return URLStatus.OK - case URLCreationEnum.NOT_RELEVANT: - return URLStatus.OK - case URLCreationEnum.ERROR: - raise ValueError("Invalid URL Status") - case URLCreationEnum.DUPLICATE: - return URLStatus.DUPLICATE - case _: - raise ValueError(f"Unknown URLCreationEnum: {url_creation_enum}") - def convert_url_creation_enum_to_validated_type( url_creation_enum: URLCreationEnum ) -> URLType: diff --git a/tests/helpers/data_creator/commands/impl/urls_/query.py b/tests/helpers/data_creator/commands/impl/urls_/query.py index 1123af8e..fd40834d 100644 --- a/tests/helpers/data_creator/commands/impl/urls_/query.py +++ b/tests/helpers/data_creator/commands/impl/urls_/query.py @@ -1,13 +1,12 @@ from datetime import datetime -from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource -from tests.helpers.data_creator.commands.impl.urls_.tdo import SubmittedURLInfo from src.db.dtos.url.insert import InsertURLsInfo from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.pydantic.info import URLInfo +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from tests.helpers.batch_creation_parameters.enums import URLCreationEnum from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase -from tests.helpers.data_creator.commands.impl.urls_.convert import convert_url_creation_enum_to_url_status +from tests.helpers.data_creator.commands.impl.urls_.tdo import SubmittedURLInfo from tests.helpers.simple_test_data_functions import generate_test_urls @@ -19,7 +18,8 @@ def __init__( url_count: int, collector_metadata: dict | None = None, status: URLCreationEnum = URLCreationEnum.OK, - created_at: datetime | None = None + created_at: datetime | None = None, + source: URLSource = URLSource.COLLECTOR ): super().__init__() self.batch_id = batch_id @@ -27,6 +27,7 @@ def __init__( self.collector_metadata = collector_metadata self.status = status self.created_at = created_at + self.source = source async def run(self) -> InsertURLsInfo: raise NotImplementedError @@ -38,14 +39,13 @@ def run_sync(self) -> InsertURLsInfo: url_infos.append( URLInfo( url=url, - status=convert_url_creation_enum_to_url_status(self.status), name="Test Name" if self.status in ( URLCreationEnum.VALIDATED, URLCreationEnum.SUBMITTED, ) else None, collector_metadata=self.collector_metadata, created_at=self.created_at, - source=URLSource.COLLECTOR + source=self.source ) ) diff --git a/tests/helpers/data_creator/commands/impl/urls_/tdo.py b/tests/helpers/data_creator/commands/impl/urls_/tdo.py index a8991dcd..fdb5a1cc 100644 --- a/tests/helpers/data_creator/commands/impl/urls_/tdo.py +++ b/tests/helpers/data_creator/commands/impl/urls_/tdo.py @@ -2,8 +2,6 @@ from pydantic import BaseModel -from src.core.enums import RecordType - class SubmittedURLInfo(BaseModel): url_id: int diff --git a/tests/helpers/data_creator/commands/impl/urls_v2/core.py b/tests/helpers/data_creator/commands/impl/urls_v2/core.py index f7042720..20edd618 100644 --- a/tests/helpers/data_creator/commands/impl/urls_v2/core.py +++ b/tests/helpers/data_creator/commands/impl/urls_v2/core.py @@ -1,16 +1,13 @@ from datetime import datetime from src.db.dtos.url.insert import InsertURLsInfo -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from tests.helpers.batch_creation_parameters.enums import URLCreationEnum from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase from tests.helpers.data_creator.commands.impl.annotate import AnnotateCommand from tests.helpers.data_creator.commands.impl.html_data import HTMLDataCreatorCommand -from tests.helpers.data_creator.commands.impl.urls_.convert import convert_url_creation_enum_to_validated_type from tests.helpers.data_creator.commands.impl.urls_.query import URLsDBDataCreatorCommand from tests.helpers.data_creator.commands.impl.urls_v2.response import URLsV2Response -from tests.helpers.data_creator.generate import generate_validated_flags from tests.helpers.data_creator.models.creation_info.url import URLCreationInfo diff --git a/tests/helpers/data_creator/commands/impl/urls_v2/response.py b/tests/helpers/data_creator/commands/impl/urls_v2/response.py index 74aa8e20..935785e2 100644 --- a/tests/helpers/data_creator/commands/impl/urls_v2/response.py +++ b/tests/helpers/data_creator/commands/impl/urls_v2/response.py @@ -1,6 +1,5 @@ from pydantic import BaseModel -from src.collectors.enums import URLStatus from tests.helpers.batch_creation_parameters.enums import URLCreationEnum from tests.helpers.data_creator.models.creation_info.url import URLCreationInfo diff --git a/tests/helpers/data_creator/core.py b/tests/helpers/data_creator/core.py index dd08a178..c1e27ae3 100644 --- a/tests/helpers/data_creator/core.py +++ b/tests/helpers/data_creator/core.py @@ -3,7 +3,7 @@ from typing import Optional, Any from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo -from src.collectors.enums import CollectorType, URLStatus +from src.collectors.enums import CollectorType from src.core.enums import BatchStatus, SuggestionType, RecordType from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo from src.core.tasks.url.operators.misc_metadata.tdo import URLMiscellaneousMetadataTDO @@ -14,23 +14,23 @@ from src.db.enums import TaskType from src.db.models.impl.agency.enums import AgencyType, JurisdictionType from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.annotation.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask +from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion +from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser +from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource +from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion +from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement from src.db.models.impl.duplicate.pydantic.insert import DuplicateInsertInfo from src.db.models.impl.flag.root_url.sqlalchemy import FlagRootURL from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL -from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML -from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType -from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask -from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource -from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion from src.db.models.impl.url.task_error.pydantic_.insert import URLTaskErrorPydantic from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters @@ -266,6 +266,7 @@ def urls( url_count: int, collector_metadata: dict | None = None, outcome: URLCreationEnum = URLCreationEnum.OK, + source: URLSource = URLSource.COLLECTOR, created_at: datetime | None = None ) -> InsertURLsInfo: command = URLsDBDataCreatorCommand( @@ -273,6 +274,7 @@ def urls( url_count=url_count, collector_metadata=collector_metadata, status=outcome, + source=source, created_at=created_at ) return self.run_command_sync(command) @@ -437,7 +439,6 @@ async def create_submitted_urls( async def create_urls( self, - status: URLStatus = URLStatus.OK, source: URLSource = URLSource.COLLECTOR, record_type: RecordType | None = RecordType.RESOURCES, collector_metadata: dict | None = None, @@ -447,7 +448,6 @@ async def create_urls( url_mappings: list[SimpleURLMapping] = await create_urls( adb_client=self.adb_client, - status=status, source=source, record_type=record_type, collector_metadata=collector_metadata, @@ -638,7 +638,7 @@ async def add_user_location_suggestion( user_id: int, location_id: int, ): - suggestion = UserLocationSuggestion( + suggestion = AnnotationLocationUser( url_id=url_id, user_id=user_id, location_id=location_id, @@ -654,7 +654,7 @@ async def add_location_suggestion( ) -> None: locations_found: bool = len(location_ids) > 0 task_id: int = await self.task(url_ids=[url_id]) - subtask = AutoLocationIDSubtask( + subtask = AnnotationLocationAutoSubtask( url_id=url_id, type=type_, task_id=task_id, @@ -663,9 +663,9 @@ async def add_location_suggestion( subtask_id: int = await self.adb_client.add(subtask, return_id=True) if not locations_found: return - suggestions: list[LocationIDSubtaskSuggestion] = [] + suggestions: list[AnnotationLocationAutoSuggestion] = [] for location_id in location_ids: - suggestion = LocationIDSubtaskSuggestion( + suggestion = AnnotationLocationAutoSuggestion( subtask_id=subtask_id, location_id=location_id, confidence=confidence @@ -695,7 +695,7 @@ async def name_suggestion( ) -> int: if name is None: name = f"Test Name {next_int()}" - suggestion = URLNameSuggestion( + suggestion = AnnotationNameSuggestion( url_id=url_id, source=source, suggestion=name, @@ -707,7 +707,7 @@ async def user_name_endorsement( suggestion_id: int, user_id: int, ): - link = LinkUserNameSuggestion( + link = AnnotationNameUserEndorsement( suggestion_id=suggestion_id, user_id=user_id, ) diff --git a/tests/helpers/data_creator/create.py b/tests/helpers/data_creator/create.py index 57c9f9da..1c2073fd 100644 --- a/tests/helpers/data_creator/create.py +++ b/tests/helpers/data_creator/create.py @@ -1,6 +1,6 @@ from datetime import datetime -from src.collectors.enums import CollectorType, URLStatus +from src.collectors.enums import CollectorType from src.core.enums import BatchStatus, RecordType from src.db import County, Locality, USState from src.db.client.async_ import AsyncDatabaseClient @@ -32,14 +32,12 @@ async def create_batch( async def create_urls( adb_client: AsyncDatabaseClient, - status: URLStatus = URLStatus.OK, source: URLSource = URLSource.COLLECTOR, record_type: RecordType | None = RecordType.RESOURCES, collector_metadata: dict | None = None, count: int = 1 ) -> list[SimpleURLMapping]: urls: list[URLInsertModel] = generate_urls( - status=status, source=source, collector_metadata=collector_metadata, count=count, diff --git a/tests/helpers/data_creator/generate.py b/tests/helpers/data_creator/generate.py index b447888d..aa63b202 100644 --- a/tests/helpers/data_creator/generate.py +++ b/tests/helpers/data_creator/generate.py @@ -1,11 +1,10 @@ from datetime import datetime -from src.collectors.enums import URLStatus, CollectorType -from src.core.enums import BatchStatus, RecordType +from src.collectors.enums import CollectorType +from src.core.enums import BatchStatus from src.db.models.impl.batch.pydantic.insert import BatchInsertModel from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.batch_url.pydantic import LinkBatchURLPydantic from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.pydantic.insert import URLInsertModel @@ -39,7 +38,6 @@ def generate_batch_url_links( ] def generate_urls( - status: URLStatus = URLStatus.OK, source: URLSource = URLSource.COLLECTOR, collector_metadata: dict | None = None, count: int = 1 @@ -50,7 +48,6 @@ def generate_urls( results.append(URLInsertModel( url=f"example.com/{val}", scheme="https", - status=status, source=source, name=f"Example {val}", collector_metadata=collector_metadata, diff --git a/tests/helpers/setup/annotation/core.py b/tests/helpers/setup/annotation/core.py index 70123cb9..10bc67b7 100644 --- a/tests/helpers/setup/annotation/core.py +++ b/tests/helpers/setup/annotation/core.py @@ -1,4 +1,3 @@ -from src.collectors.enums import URLStatus from tests.helpers.batch_creation_parameters.enums import URLCreationEnum from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.setup.annotation.model import AnnotationSetupInfo diff --git a/tests/helpers/setup/final_review/core.py b/tests/helpers/setup/final_review/core.py index a3a3d42c..20c0f8df 100644 --- a/tests/helpers/setup/final_review/core.py +++ b/tests/helpers/setup/final_review/core.py @@ -1,17 +1,16 @@ -from typing import Optional - from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo from src.core.enums import RecordType from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.impl.url.core.enums import URLSource from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.setup.final_review.model import FinalReviewSetupInfo async def setup_for_get_next_url_for_final_review( db_data_creator: DBDataCreator, - annotation_count: int | None = None, include_user_annotations: bool = True, - include_miscellaneous_metadata: bool = True + include_miscellaneous_metadata: bool = True, + source: URLSource = URLSource.COLLECTOR ) -> FinalReviewSetupInfo: """ Sets up the database to test the final_review functions @@ -22,7 +21,8 @@ async def setup_for_get_next_url_for_final_review( batch_id = db_data_creator.batch() url_mapping = db_data_creator.urls( batch_id=batch_id, - url_count=1 + url_count=1, + source=source ).url_mappings[0] if include_miscellaneous_metadata: await db_data_creator.url_miscellaneous_metadata(url_id=url_mapping.url_id) diff --git a/tests/helpers/setup/wipe.py b/tests/helpers/setup/wipe.py index f6cd3582..7d4f0672 100644 --- a/tests/helpers/setup/wipe.py +++ b/tests/helpers/setup/wipe.py @@ -1,4 +1,4 @@ -from sqlalchemy import create_engine, Engine +from sqlalchemy import Engine from src.db.models.templates_.base import Base diff --git a/tests/manual/api/test_contributions.py b/tests/manual/api/test_contributions.py index 90d8e8de..6689ffdf 100644 --- a/tests/manual/api/test_contributions.py +++ b/tests/manual/api/test_contributions.py @@ -1,9 +1,9 @@ import pytest -from src.api.endpoints.contributions.leaderboard.query import GetContributionsLeaderboardQueryBuilder from src.api.endpoints.contributions.user.queries.core import GetUserContributionsQueryBuilder from src.db.client.async_ import AsyncDatabaseClient + # 72 = Max # 17 = Josh diff --git a/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py b/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py index 22203910..6eedb7f0 100644 --- a/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py +++ b/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py @@ -2,9 +2,9 @@ import dotenv -from src.db.models.impl.batch.pydantic.info import BatchInfo from src.collectors.enums import CollectorType from src.core.enums import BatchStatus +from src.db.models.impl.batch.pydantic.info import BatchInfo def test_auto_googler_collector_lifecycle(test_core): diff --git a/tests/manual/core/lifecycle/test_ckan_lifecycle.py b/tests/manual/core/lifecycle/test_ckan_lifecycle.py index 66020a92..85bfca55 100644 --- a/tests/manual/core/lifecycle/test_ckan_lifecycle.py +++ b/tests/manual/core/lifecycle/test_ckan_lifecycle.py @@ -1,8 +1,9 @@ -from src.db.models.impl.batch.pydantic.info import BatchInfo +from test_automated.integration.core.helpers.common_test_procedures import run_collector_and_wait_for_completion + from src.collectors import CollectorType -from src.core.enums import BatchStatus from src.collectors.impl.ckan import group_search, package_search, organization_search -from test_automated.integration.core.helpers.common_test_procedures import run_collector_and_wait_for_completion +from src.core.enums import BatchStatus +from src.db.models.impl.batch.pydantic.info import BatchInfo def test_ckan_lifecycle(test_core): diff --git a/tests/manual/core/lifecycle/test_muckrock_lifecycles.py b/tests/manual/core/lifecycle/test_muckrock_lifecycles.py index 216638dc..c78a8199 100644 --- a/tests/manual/core/lifecycle/test_muckrock_lifecycles.py +++ b/tests/manual/core/lifecycle/test_muckrock_lifecycles.py @@ -1,9 +1,10 @@ -from src.db.models.impl.batch.pydantic.info import BatchInfo -from src.collectors import CollectorType -from src.core.enums import BatchStatus from test_automated.integration.core.helpers.common_test_procedures import run_collector_and_wait_for_completion from test_automated.integration.core.helpers.constants import ALLEGHENY_COUNTY_MUCKROCK_ID, ALLEGHENY_COUNTY_TOWN_NAMES +from src.collectors import CollectorType +from src.core.enums import BatchStatus +from src.db.models.impl.batch.pydantic.info import BatchInfo + def test_muckrock_simple_search_collector_lifecycle(test_core): ci = test_core diff --git a/tests/manual/core/tasks/scheduled/ds_app_sync/__init__.py b/tests/manual/core/tasks/scheduled/ds_app_sync/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/__init__.py b/tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/update/__init__.py b/tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/update/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/update/test_prereq.py b/tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/update/test_prereq.py new file mode 100644 index 00000000..a14dc917 --- /dev/null +++ b/tests/manual/core/tasks/scheduled/ds_app_sync/data_sources/update/test_prereq.py @@ -0,0 +1,13 @@ +import pytest + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.queries.prereq import \ + DSAppSyncDataSourcesUpdatePrerequisitesQueryBuilder +from src.db.client.async_ import AsyncDatabaseClient + + +@pytest.mark.asyncio +async def test_prereq(adb_client_test: AsyncDatabaseClient): + await adb_client_test.run_query_builder( + DSAppSyncDataSourcesUpdatePrerequisitesQueryBuilder() + ) + diff --git a/tests/manual/core/tasks/scheduled/test_push_to_huggingface.py b/tests/manual/core/tasks/scheduled/test_push_to_huggingface.py index a091ff5c..3a864bae 100644 --- a/tests/manual/core/tasks/scheduled/test_push_to_huggingface.py +++ b/tests/manual/core/tasks/scheduled/test_push_to_huggingface.py @@ -1,8 +1,6 @@ import pytest - from environs import Env -from src.core.env_var_manager import EnvVarManager from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator from src.db.client.async_ import AsyncDatabaseClient from src.external.huggingface.hub.client import HuggingFaceHubClient diff --git a/tests/manual/external/huggingface/inference/test_relevancy.py b/tests/manual/external/huggingface/inference/test_relevancy.py index e001d864..abe4296b 100644 --- a/tests/manual/external/huggingface/inference/test_relevancy.py +++ b/tests/manual/external/huggingface/inference/test_relevancy.py @@ -1,12 +1,11 @@ import pytest from aiohttp import ClientSession +from environs import Env from src.external.huggingface.inference.client import HuggingFaceInferenceClient from src.external.huggingface.inference.models.input import BasicInput from tests.manual.external.huggingface.inference.constants import EXAMPLE_WEBSITE -from environs import Env - @pytest.mark.asyncio async def test_huggingface_inference_relevancy_annotation(): diff --git a/tests/manual/external/internet_archive/test_search.py b/tests/manual/external/internet_archive/test_search.py index 930d0304..41dcee1f 100644 --- a/tests/manual/external/internet_archive/test_search.py +++ b/tests/manual/external/internet_archive/test_search.py @@ -2,7 +2,6 @@ from aiohttp import ClientSession from src.external.internet_archives.client import InternetArchivesClient -from src.external.internet_archives.models.capture import IACapture # BASE_URL = "nola.gov/getattachment/NOPD/Policies/Chapter-12-1-Department-Operations-Manual-EFFECTIVE-1-14-18.pdf/" BASE_URL = "example.com" diff --git a/tests/manual/external/pdap/conftest.py b/tests/manual/external/pdap/conftest.py index de386ad7..51c1947c 100644 --- a/tests/manual/external/pdap/conftest.py +++ b/tests/manual/external/pdap/conftest.py @@ -1,7 +1,8 @@ import pytest import pytest_asyncio from aiohttp import ClientSession -from pdap_access_manager import AccessManager +from pdap_access_manager.access_manager.async_ import AccessManagerAsync as AccessManager +from pdap_access_manager.models.auth import AuthInfo from src.external.pdap.client import PDAPClient from src.util.helper_functions import get_from_env @@ -15,8 +16,10 @@ async def client_session(): @pytest.fixture def access_manager(client_session): return AccessManager( - email=get_from_env("PDAP_PROD_EMAIL"), - password=get_from_env("PDAP_PROD_PASSWORD"), + auth=AuthInfo( + email=get_from_env("PDAP_PROD_EMAIL"), + password=get_from_env("PDAP_PROD_PASSWORD"), + ), api_key=get_from_env("PDAP_API_KEY", allow_none=True), session=client_session ) @@ -24,8 +27,10 @@ def access_manager(client_session): @pytest.fixture def access_manager_dev(client_session): return AccessManager( - email=get_from_env("PDAP_DEV_EMAIL"), - password=get_from_env("PDAP_DEV_PASSWORD"), + auth=AuthInfo( + email=get_from_env("PDAP_DEV_EMAIL"), + password=get_from_env("PDAP_DEV_PASSWORD"), + ), api_key=get_from_env("PDAP_DEV_API_KEY", allow_none=True), data_sources_url=get_from_env("PDAP_DEV_API_URL"), session=client_session diff --git a/tests/manual/external/pdap/test_check_for_duplicate.py b/tests/manual/external/pdap/test_check_for_duplicate.py deleted file mode 100644 index 25a8bc52..00000000 --- a/tests/manual/external/pdap/test_check_for_duplicate.py +++ /dev/null @@ -1,9 +0,0 @@ -import pytest - - -@pytest.mark.asyncio -async def test_check_for_duplicate(pdap_client): - - response = await pdap_client.is_url_duplicate(url_to_check="example.com") - - print(response) diff --git a/tests/manual/external/pdap/test_get_follows.py b/tests/manual/external/pdap/test_get_follows.py new file mode 100644 index 00000000..08247277 --- /dev/null +++ b/tests/manual/external/pdap/test_get_follows.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.mark.asyncio +async def test_get_user_followed_locations(pdap_client_dev): + response = await pdap_client_dev.get_user_followed_locations() + print(response) \ No newline at end of file diff --git a/tests/manual/external/pdap/test_get_follows_sync.py b/tests/manual/external/pdap/test_get_follows_sync.py new file mode 100644 index 00000000..9d62209b --- /dev/null +++ b/tests/manual/external/pdap/test_get_follows_sync.py @@ -0,0 +1,12 @@ +import pytest + +from src.external.pdap.impl.sync.follows.core import GetFollowsRequestBuilder + + +@pytest.mark.asyncio +async def test_get_follows_sync(pdap_client_dev): + + response = await pdap_client_dev.run_request_builder( + GetFollowsRequestBuilder() + ) + print(response) diff --git a/tests/manual/llm_api_logic/test_deepseek_record_classifier.py b/tests/manual/llm_api_logic/test_deepseek_record_classifier.py index f26f2a6f..0ec3ba16 100644 --- a/tests/manual/llm_api_logic/test_deepseek_record_classifier.py +++ b/tests/manual/llm_api_logic/test_deepseek_record_classifier.py @@ -1,7 +1,7 @@ import pytest -from src.db.dtos.url.html_content import URLHTMLContentInfo from src.core.tasks.url.operators.record_type.llm_api.record_classifier.deepseek import DeepSeekRecordClassifier +from src.db.dtos.url.html_content import URLHTMLContentInfo @pytest.mark.asyncio diff --git a/tests/manual/llm_api_logic/test_openai_record_classifier.py b/tests/manual/llm_api_logic/test_openai_record_classifier.py index 3b3ec08b..25208b63 100644 --- a/tests/manual/llm_api_logic/test_openai_record_classifier.py +++ b/tests/manual/llm_api_logic/test_openai_record_classifier.py @@ -1,7 +1,7 @@ import pytest -from src.db.dtos.url.html_content import URLHTMLContentInfo from src.core.tasks.url.operators.record_type.llm_api.record_classifier.openai import OpenAIRecordClassifier +from src.db.dtos.url.html_content import URLHTMLContentInfo @pytest.mark.asyncio diff --git a/tests/manual/source_collectors/test_autogoogler_collector.py b/tests/manual/source_collectors/test_autogoogler_collector.py index 39d1f8e7..e0f609cb 100644 --- a/tests/manual/source_collectors/test_autogoogler_collector.py +++ b/tests/manual/source_collectors/test_autogoogler_collector.py @@ -1,13 +1,14 @@ from unittest.mock import AsyncMock import pytest +from environs import Env +from src.collectors.impl.auto_googler.collector import AutoGooglerCollector from src.collectors.impl.auto_googler.dtos.input import AutoGooglerInputDTO from src.core.env_var_manager import EnvVarManager from src.core.logger import AsyncCoreLogger -from src.collectors.impl.auto_googler.collector import AutoGooglerCollector from src.db.client.async_ import AsyncDatabaseClient -from environs import Env + @pytest.mark.asyncio async def test_autogoogler_collector(monkeypatch): diff --git a/tests/manual/source_collectors/test_ckan_collector.py b/tests/manual/source_collectors/test_ckan_collector.py index 9b5edc9f..753c8a30 100644 --- a/tests/manual/source_collectors/test_ckan_collector.py +++ b/tests/manual/source_collectors/test_ckan_collector.py @@ -4,9 +4,8 @@ from marshmallow import Schema, fields from src.collectors.impl.ckan.collector import CKANCollector -from src.core.logger import AsyncCoreLogger -from src.collectors.impl.ckan import collector from src.collectors.impl.ckan.dtos.input import CKANInputDTO +from src.core.logger import AsyncCoreLogger class CKANSchema(Schema): diff --git a/tests/manual/source_collectors/test_common_crawler_collector.py b/tests/manual/source_collectors/test_common_crawler_collector.py index e508c2ac..61e6fdbc 100644 --- a/tests/manual/source_collectors/test_common_crawler_collector.py +++ b/tests/manual/source_collectors/test_common_crawler_collector.py @@ -3,9 +3,8 @@ import pytest from marshmallow import Schema, fields -from src.core.logger import AsyncCoreLogger -from src.collectors.impl.common_crawler import collector from src.collectors.impl.common_crawler import CommonCrawlerInputDTO +from src.core.logger import AsyncCoreLogger class CommonCrawlerSchema(Schema): diff --git a/tests/manual/source_collectors/test_muckrock_collectors.py b/tests/manual/source_collectors/test_muckrock_collectors.py index d8153c6b..0a69cfc0 100644 --- a/tests/manual/source_collectors/test_muckrock_collectors.py +++ b/tests/manual/source_collectors/test_muckrock_collectors.py @@ -3,16 +3,17 @@ import pytest from marshmallow import Schema, fields -from src.core.logger import AsyncCoreLogger +from src.collectors.impl import MuckrockSimpleSearchCollector, \ + MuckrockCountyLevelSearchCollector, MuckrockAllFOIARequestsCollector from src.collectors.impl.muckrock.collectors.all_foia.dto import MuckrockAllFOIARequestsCollectorInputDTO from src.collectors.impl.muckrock.collectors.county.dto import MuckrockCountySearchCollectorInputDTO from src.collectors.impl.muckrock.collectors.simple.dto import MuckrockSimpleSearchCollectorInputDTO -from src.collectors.impl import MuckrockSimpleSearchCollector, \ - MuckrockCountyLevelSearchCollector, MuckrockAllFOIARequestsCollector +from src.core.logger import AsyncCoreLogger from src.db.client.async_ import AsyncDatabaseClient from tests.automated.integration.core.helpers.constants import ALLEGHENY_COUNTY_MUCKROCK_ID, \ ALLEGHENY_COUNTY_TOWN_NAMES + class MuckrockURLInfoSchema(Schema): url = fields.String(required=True) metadata = fields.Dict(required=True) diff --git a/tests/manual/unsorted/test_common_crawler_integration.py b/tests/manual/unsorted/test_common_crawler_integration.py index 4b79893a..d458079d 100644 --- a/tests/manual/unsorted/test_common_crawler_integration.py +++ b/tests/manual/unsorted/test_common_crawler_integration.py @@ -1,10 +1,7 @@ import csv -import datetime -import json import os import shutil import tempfile -from unittest.mock import patch import pytest from common_crawler.cache import CommonCrawlerCacheManager