From 9a69a9afdc0551d0139e8e3a1e0038f7ebf4ac12 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Mon, 22 Sep 2025 07:53:10 -0400 Subject: [PATCH] Complete pre-auto validate draft --- ...843b76_update_for_human_agreement_logic.py | 406 ++++++++++++++++++ .../get_next_url_for_user_annotation.py | 80 ---- src/api/endpoints/annotate/agency/get/dto.py | 11 +- .../agency/get/queries/next_for_annotation.py | 118 ----- .../annotate/all/get/queries/core.py | 31 +- .../get/queries/previously_annotated/build.py | 37 -- .../get/queries/previously_annotated/core.py | 22 - .../annotate/all/post/models/request.py | 70 ++- src/api/endpoints/annotate/all/post/query.py | 29 +- .../annotate/dtos/record_type/post.py | 7 - .../annotate/dtos/record_type/response.py | 19 - .../endpoints/annotate/relevance/get/dto.py | 8 - .../endpoints/annotate/relevance/get/query.py | 64 --- .../endpoints/annotate/relevance/post/dto.py | 7 - src/api/endpoints/annotate/routes.py | 108 +---- .../metrics/batches/aggregated/query/core.py | 2 +- .../aggregated/query/rejected/query.py | 4 +- .../batches/breakdown/not_relevant/cte_.py | 4 +- .../aggregated/query/subqueries/rejected.py | 4 +- .../metrics/urls/breakdown/query/core.py | 6 +- .../endpoints/review/approve/query_/core.py | 4 +- src/api/endpoints/review/next/convert.py | 46 +- src/api/endpoints/review/next/core.py | 12 +- src/api/endpoints/review/next/dto.py | 19 +- src/api/endpoints/review/reject/query.py | 8 +- src/core/core.py | 110 +---- src/core/enums.py | 10 - .../impl/huggingface/queries/get/convert.py | 8 +- .../impl/huggingface/queries/get/core.py | 8 +- .../queries/upsert/links/lookup_/links.py | 4 +- .../queries/upsert/meta_urls/add/core.py | 4 +- .../upsert/meta_urls/lookup/response.py | 4 +- .../queries/upsert/meta_urls/update/filter.py | 4 +- .../queries/upsert/meta_urls/update/params.py | 4 +- .../upsert/meta_urls/update/requester.py | 6 +- .../queries/upsert/agency/core.py | 4 +- .../data_sources/queries/upsert/convert.py | 8 +- .../queries/upsert/param_manager.py | 4 +- .../queries/ctes/whitelisted_root_urls.py | 4 +- .../operators/submit_approved/queries/cte.py | 4 +- .../tasks/url/operators/validate}/__init__.py | 0 src/core/tasks/url/operators/validate/core.py | 23 + .../operators/validate/queries}/__init__.py | 0 .../url/operators/validate/queries/cte.py | 8 + .../validate/queries/get}/__init__.py | 0 .../operators/validate/queries/get/core.py | 20 + .../validate/queries/prereq}/__init__.py | 0 src/db/client/async_.py | 103 +---- src/db/client/types.py | 4 +- src/db/constants.py | 4 +- src/db/dto_converter.py | 22 +- src/db/enums.py | 1 + .../impl/flag/auto_validated}/__init__.py | 0 .../impl/flag/auto_validated/pydantic.py | 12 + .../impl/flag/auto_validated/sqlalchemy.py | 18 + .../models/impl/flag/url_validated/enums.py | 3 +- .../impl/flag/url_validated/pydantic.py | 4 +- .../impl/flag/url_validated/sqlalchemy.py | 6 +- src/db/models/impl/url/core/sqlalchemy.py | 15 +- .../models/impl/url/suggestion/agency/user.py | 6 +- .../impl/url/suggestion/record_type/user.py | 2 +- .../impl/url/suggestion/relevant/user.py | 21 +- src/db/models/views/meta_url.py | 2 +- src/db/models/views/unvalidated_url.py | 1 + src/db/models/views/url_anno_count.py | 124 ++++++ .../common/annotation_exists_/constants.py | 4 +- .../url_counts/builder.py | 2 +- .../url_counts/cte/not_relevant.py | 4 +- .../core/metrics/urls/aggregated/pending.py | 4 +- src/db/types.py | 4 +- .../api/_helpers/RequestValidator.py | 63 +-- .../agency/test_multiple_auto_suggestions.py | 46 -- .../test_multiple_auto_suggestions_no_html.py | 35 -- .../agency/test_other_user_annotation.py | 44 -- .../agency/test_single_confirmed_agency.py | 22 - .../test_single_unknown_auto_suggestions.py | 45 -- .../agency/test_submit_and_get_next.py | 42 -- .../api/annotate/agency/test_submit_new.py | 38 -- .../api/annotate/all/test_happy_path.py | 28 +- .../annotate/all/test_post_batch_filtering.py | 12 +- .../api/annotate/all/test_validation_error.py | 8 +- .../annotate/record_type/test_record_type.py | 166 ------- .../api/annotate/relevancy/test_relevancy.py | 213 --------- .../api/metrics/batches/test_aggregated.py | 6 +- .../api/metrics/batches/test_breakdown.py | 8 +- .../integration/api/metrics/test_backlog.py | 15 +- .../api/metrics/urls/aggregated/test_core.py | 6 +- .../metrics/urls/aggregated/test_pending.py | 13 +- .../metrics/urls/breakdown/test_pending.py | 11 +- .../integration/api/review/conftest.py | 6 +- .../rejection/test_individual_record.py | 4 +- .../api/review/rejection/test_not_relevant.py | 4 +- .../test_approve_and_get_next_source.py | 4 +- .../api/review/test_next_source.py | 10 +- .../annotate_url/test_marked_not_relevant.py | 66 --- .../test_basic.py | 11 +- .../__init__.py | 0 .../test_pending.py | 68 --- .../test_validated.py | 30 -- ...next_url_for_annotation_batch_filtering.py | 29 +- ...get_next_url_for_user_agency_annotation.py | 61 --- ...ext_url_for_user_record_type_annotation.py | 59 --- .../impl/huggingface/setup/queries/convert.py | 8 +- .../scheduled/impl/sync/agency/setup/core.py | 4 +- .../sync/agency/test_ds_url_in_db_not_sync.py | 8 +- .../agency/test_meta_url_in_db_not_sync.py | 6 +- .../agency/test_same_meta_url_diff_agency.py | 6 +- .../test_with_meta_url_not_in_database.py | 4 +- .../impl/sync/data_sources/setup/core.py | 4 +- .../setup/queries/url_/requester.py | 4 +- .../data_sources/setup/queries/url_/url.py | 4 +- .../impl/sync/data_sources/test_db_only.py | 2 +- .../test_meta_url_not_modified.py | 8 +- .../data_sources/test_url_broken_approved.py | 4 +- .../test_url_in_db_overwritten_by_ds.py | 6 +- .../sync/data_sources/test_url_ok_approved.py | 4 +- .../ineligible_cases/test_blacklist.py | 4 +- .../homepage_match/test_happy_path.py | 6 +- .../url/impl/probe/no_redirect/test_error.py | 4 +- .../impl/probe/no_redirect/test_not_found.py | 4 +- .../test_validated_meta_url.py | 4 +- .../relevancy => unit/api}/__init__.py | 0 .../unit/api/test_all_annotation_post_info.py | 156 +++++++ .../annotation_info.py | 5 +- .../commands/impl/suggestion/user/relevant.py | 5 +- .../commands/impl/urls_/convert.py | 10 +- tests/helpers/data_creator/core.py | 22 +- tests/helpers/data_creator/create.py | 4 +- tests/helpers/data_creator/generate.py | 4 +- tests/helpers/setup/final_review/core.py | 5 +- 130 files changed, 1181 insertions(+), 2026 deletions(-) create mode 100644 alembic/versions/2025_09_21_0940-8d7208843b76_update_for_human_agreement_logic.py delete mode 100644 src/api/endpoints/annotate/_shared/queries/get_next_url_for_user_annotation.py delete mode 100644 src/api/endpoints/annotate/agency/get/queries/next_for_annotation.py delete mode 100644 src/api/endpoints/annotate/all/get/queries/previously_annotated/build.py delete mode 100644 src/api/endpoints/annotate/all/get/queries/previously_annotated/core.py delete mode 100644 src/api/endpoints/annotate/dtos/record_type/post.py delete mode 100644 src/api/endpoints/annotate/dtos/record_type/response.py delete mode 100644 src/api/endpoints/annotate/relevance/get/query.py delete mode 100644 src/api/endpoints/annotate/relevance/post/dto.py rename src/{api/endpoints/annotate/all/get/queries/previously_annotated => core/tasks/url/operators/validate}/__init__.py (100%) create mode 100644 src/core/tasks/url/operators/validate/core.py rename src/{api/endpoints/annotate/dtos/record_type => core/tasks/url/operators/validate/queries}/__init__.py (100%) create mode 100644 src/core/tasks/url/operators/validate/queries/cte.py rename src/{api/endpoints/annotate/relevance/post => core/tasks/url/operators/validate/queries/get}/__init__.py (100%) create mode 100644 src/core/tasks/url/operators/validate/queries/get/core.py rename {tests/automated/integration/api/annotate/agency => src/core/tasks/url/operators/validate/queries/prereq}/__init__.py (100%) rename {tests/automated/integration/api/annotate/record_type => src/db/models/impl/flag/auto_validated}/__init__.py (100%) create mode 100644 src/db/models/impl/flag/auto_validated/pydantic.py create mode 100644 src/db/models/impl/flag/auto_validated/sqlalchemy.py create mode 100644 src/db/models/views/url_anno_count.py delete mode 100644 tests/automated/integration/api/annotate/agency/test_multiple_auto_suggestions.py delete mode 100644 tests/automated/integration/api/annotate/agency/test_multiple_auto_suggestions_no_html.py delete mode 100644 tests/automated/integration/api/annotate/agency/test_other_user_annotation.py delete mode 100644 tests/automated/integration/api/annotate/agency/test_single_confirmed_agency.py delete mode 100644 tests/automated/integration/api/annotate/agency/test_single_unknown_auto_suggestions.py delete mode 100644 tests/automated/integration/api/annotate/agency/test_submit_and_get_next.py delete mode 100644 tests/automated/integration/api/annotate/agency/test_submit_new.py delete mode 100644 tests/automated/integration/api/annotate/record_type/test_record_type.py delete mode 100644 tests/automated/integration/api/annotate/relevancy/test_relevancy.py delete mode 100644 tests/automated/integration/db/client/annotate_url/test_marked_not_relevant.py delete mode 100644 tests/automated/integration/db/client/get_next_url_for_user_relevance_annotation/__init__.py delete mode 100644 tests/automated/integration/db/client/get_next_url_for_user_relevance_annotation/test_pending.py delete mode 100644 tests/automated/integration/db/client/get_next_url_for_user_relevance_annotation/test_validated.py delete mode 100644 tests/automated/integration/db/client/test_get_next_url_for_user_agency_annotation.py delete mode 100644 tests/automated/integration/db/client/test_get_next_url_for_user_record_type_annotation.py rename tests/automated/{integration/api/annotate/relevancy => unit/api}/__init__.py (100%) create mode 100644 tests/automated/unit/api/test_all_annotation_post_info.py diff --git a/alembic/versions/2025_09_21_0940-8d7208843b76_update_for_human_agreement_logic.py b/alembic/versions/2025_09_21_0940-8d7208843b76_update_for_human_agreement_logic.py new file mode 100644 index 00000000..08378218 --- /dev/null +++ b/alembic/versions/2025_09_21_0940-8d7208843b76_update_for_human_agreement_logic.py @@ -0,0 +1,406 @@ +"""Update for human agreement logic + +Revision ID: 8d7208843b76 +Revises: 93cbaa3b8e9b +Create Date: 2025-09-21 09:40:36.506827 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from src.util.alembic_helpers import switch_enum_type, url_id_column, created_at_column + +# revision identifiers, used by Alembic. +revision: str = '8d7208843b76' +down_revision: Union[str, None] = '93cbaa3b8e9b' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +AUTO_VALIDATION_TASK_TYPE: str = 'Auto Validate' +URL_TYPE_NAME: str = 'url_type' +VALIDATED_URL_TYPE_NAME: str = 'validated_url_type' +FLAG_URL_VALIDATED_TABLE_NAME: str = 'flag_url_validated' + +USER_RELEVANT_SUGGESTIONS_TABLE_NAME: str = 'user_relevant_suggestions' +USER_URL_TYPE_SUGGESTIONS_TABLE_NAME: str = 'user_url_type_suggestions' + +FLAG_URL_AUTO_VALIDATED_TABLE_NAME: str = 'flag_url_auto_validated' + + +def _create_anno_count_view(): + op.execute(""" + CREATE OR REPLACE VIEW url_annotation_count_view AS + with auto_location_count as ( + select + u.id, + count(anno.url_id) as cnt + from urls u + inner join public.auto_location_id_subtasks anno on u.id = anno.url_id + group by u.id +) +, auto_agency_count as ( + select + u.id, + count(anno.url_id) as cnt + from urls u + inner join public.url_auto_agency_id_subtasks anno on u.id = anno.url_id + group by u.id +) +, auto_url_type_count as ( + select + u.id, + count(anno.url_id) as cnt + from urls u + inner join public.auto_relevant_suggestions anno on u.id = anno.url_id + group by u.id +) +, auto_record_type_count as ( + select + u.id, + count(anno.url_id) as cnt + from urls u + inner join public.auto_record_type_suggestions anno on u.id = anno.url_id + group by u.id +) +, user_location_count as ( + select + u.id, + count(anno.url_id) as cnt + from urls u + inner join public.user_location_suggestions anno on u.id = anno.url_id + group by u.id +) +, user_agency_count as ( + select + u.id, + count(anno.url_id) as cnt + from urls u + inner join public.user_url_agency_suggestions anno on u.id = anno.url_id + group by u.id +) +, user_url_type_count as ( + select + u.id, + count(anno.url_id) as cnt + from urls u + inner join public.user_url_type_suggestions anno on u.id = anno.url_id + group by u.id + ) +, user_record_type_count as ( + select + u.id, + count(anno.url_id) as cnt + from urls u + inner join public.user_record_type_suggestions anno on u.id = anno.url_id + group by u.id +) +select + u.id as url_id, + coalesce(auto_ag.cnt, 0) as auto_agency_count, + coalesce(auto_loc.cnt, 0) as auto_location_count, + coalesce(auto_rec.cnt, 0) as auto_record_type_count, + coalesce(auto_typ.cnt, 0) as auto_url_type_count, + coalesce(user_ag.cnt, 0) as user_agency_count, + coalesce(user_loc.cnt, 0) as user_location_count, + coalesce(user_rec.cnt, 0) as user_record_type_count, + coalesce(user_typ.cnt, 0) as user_url_type_count, + ( + coalesce(auto_ag.cnt, 0) + + coalesce(auto_loc.cnt, 0) + + coalesce(auto_rec.cnt, 0) + + coalesce(auto_typ.cnt, 0) + + coalesce(user_ag.cnt, 0) + + coalesce(user_loc.cnt, 0) + + coalesce(user_rec.cnt, 0) + + coalesce(user_typ.cnt, 0) + ) as total_anno_count + + from urls u + left join auto_agency_count auto_ag on auto_ag.id = u.id + left join auto_location_count auto_loc on auto_loc.id = u.id + left join auto_record_type_count auto_rec on auto_rec.id = u.id + left join auto_url_type_count auto_typ on auto_typ.id = u.id + left join user_agency_count user_ag on user_ag.id = u.id + left join user_location_count user_loc on user_loc.id = u.id + left join user_record_type_count user_rec on user_rec.id = u.id + left join user_url_type_count user_typ on user_typ.id = u.id + + + """) + + +def upgrade() -> None: + _drop_meta_url_view() + _drop_unvalidated_url_view() + + # URL Type + _rename_validated_url_type_to_url_type() + _add_not_found_url_type() + + # suggested Status + _rename_user_relevant_suggestions_to_user_url_type_suggestions() + _rename_suggested_status_column_to_type() + _switch_suggested_status_with_url_type() + _remove_suggested_status_enum() + + _add_flag_url_auto_validated_table() + _add_auto_validate_task() + + _create_anno_count_view() + + + _add_meta_url_view() + _add_unvalidated_url_view() + + +def _remove_suggested_status_enum(): + op.execute(f"DROP TYPE suggested_status") + + +def _add_suggested_status_enum(): + op.execute( + "create type suggested_status as enum " + + "('relevant', 'not relevant', 'individual record', 'broken page/404 not found');" + ) + + +def _drop_anno_count_view(): + op.execute(""" + DROP VIEW IF EXISTS url_annotation_count_view + """) + + +def downgrade() -> None: + _drop_meta_url_view() + _drop_unvalidated_url_view() + _drop_anno_count_view() + + # Suggested Status + _add_suggested_status_enum() + _replace_url_type_with_suggested_status() + _rename_type_column_to_suggested_status() + _rename_user_url_type_suggestions_to_user_relevant_suggestions() + + # URL Type + _remove_not_found_url_type() + _rename_url_type_to_validated_url_type() + + _remove_auto_validate_task() + _remove_flag_url_auto_validated_table() + + + _add_meta_url_view() + _add_unvalidated_url_view() + +def _rename_suggested_status_column_to_type(): + op.alter_column( + table_name=USER_URL_TYPE_SUGGESTIONS_TABLE_NAME, + column_name='suggested_status', + new_column_name='type' + ) + + +def _rename_type_column_to_suggested_status(): + op.alter_column( + table_name=USER_URL_TYPE_SUGGESTIONS_TABLE_NAME, + column_name='type', + new_column_name='suggested_status' + ) + + + + +def _drop_unvalidated_url_view(): + op.execute("DROP VIEW IF EXISTS unvalidated_url_view") + + +def _add_unvalidated_url_view(): + op.execute(""" + CREATE OR REPLACE VIEW unvalidated_url_view AS + select + u.id as url_id + from + urls u + left join flag_url_validated fuv + on fuv.url_id = u.id + where + fuv.type is null + """) + + +def _add_meta_url_view(): + op.execute(""" + CREATE OR REPLACE VIEW meta_url_view AS + SELECT + urls.id as url_id + FROM urls + INNER JOIN flag_url_validated fuv on fuv.url_id = urls.id + where fuv.type = 'meta url' + """) + +def _drop_meta_url_view(): + op.execute("DROP VIEW IF EXISTS meta_url_view") + +def _rename_validated_url_type_to_url_type(): + op.execute(f""" + ALTER TYPE {VALIDATED_URL_TYPE_NAME} RENAME TO {URL_TYPE_NAME} + """) + +def _rename_url_type_to_validated_url_type(): + op.execute(f""" + ALTER TYPE {URL_TYPE_NAME} RENAME TO {VALIDATED_URL_TYPE_NAME} + """) + +def _add_not_found_url_type(): + switch_enum_type( + table_name=FLAG_URL_VALIDATED_TABLE_NAME, + column_name='type', + enum_name=URL_TYPE_NAME, + new_enum_values=[ + 'data source', + 'meta url', + 'not relevant', + 'individual record', + 'not found' + ] + ) + +def _remove_not_found_url_type(): + switch_enum_type( + table_name=FLAG_URL_VALIDATED_TABLE_NAME, + column_name='type', + enum_name=URL_TYPE_NAME, + new_enum_values=[ + 'data source', + 'meta url', + 'not relevant', + 'individual record' + ] + ) + + +def _switch_suggested_status_with_url_type(): + op.execute(f""" + ALTER TABLE {USER_URL_TYPE_SUGGESTIONS_TABLE_NAME} + ALTER COLUMN type type {URL_TYPE_NAME} + USING ( + CASE type::text + WHEN 'relevant' THEN 'data source' + WHEN 'broken page/404 not found' THEN 'not found' + ELSE type::text + END + )::{URL_TYPE_NAME} + """) + + + +def _replace_url_type_with_suggested_status(): + op.execute(f""" + ALTER TABLE {USER_URL_TYPE_SUGGESTIONS_TABLE_NAME} + ALTER COLUMN type type suggested_status + USING ( + CASE type::text + WHEN 'data source' THEN 'relevant' + WHEN 'not found' THEN 'broken page/404 not found' + ELSE type::text + END + )::suggested_status + + """) + + + + +def _add_flag_url_auto_validated_table(): + op.create_table( + FLAG_URL_AUTO_VALIDATED_TABLE_NAME, + url_id_column(), + created_at_column(), + sa.PrimaryKeyConstraint('url_id') + ) + + + +def _remove_flag_url_auto_validated_table(): + op.drop_table(FLAG_URL_AUTO_VALIDATED_TABLE_NAME) + + + +def _add_auto_validate_task(): + switch_enum_type( + table_name='tasks', + column_name='task_type', + enum_name='task_type', + new_enum_values=[ + 'HTML', + 'Relevancy', + 'Record Type', + 'Agency Identification', + 'Misc Metadata', + 'Submit Approved URLs', + 'Duplicate Detection', + '404 Probe', + 'Sync Agencies', + 'Sync Data Sources', + 'Push to Hugging Face', + 'URL Probe', + 'Populate Backlog Snapshot', + 'Delete Old Logs', + 'Run URL Task Cycles', + 'Root URL', + 'Internet Archives Probe', + 'Internet Archives Archive', + 'Screenshot', + 'Location ID', + AUTO_VALIDATION_TASK_TYPE, + ] + ) + + +def _rename_user_relevant_suggestions_to_user_url_type_suggestions(): + op.rename_table( + old_table_name=USER_RELEVANT_SUGGESTIONS_TABLE_NAME, + new_table_name=USER_URL_TYPE_SUGGESTIONS_TABLE_NAME + ) + + + +def _rename_user_url_type_suggestions_to_user_relevant_suggestions(): + op.rename_table( + old_table_name=USER_URL_TYPE_SUGGESTIONS_TABLE_NAME, + new_table_name=USER_RELEVANT_SUGGESTIONS_TABLE_NAME + ) + + +def _remove_auto_validate_task(): + switch_enum_type( + table_name='tasks', + column_name='task_type', + enum_name='task_type', + new_enum_values=[ + 'HTML', + 'Relevancy', + 'Record Type', + 'Agency Identification', + 'Misc Metadata', + 'Submit Approved URLs', + 'Duplicate Detection', + '404 Probe', + 'Sync Agencies', + 'Sync Data Sources', + 'Push to Hugging Face', + 'URL Probe', + 'Populate Backlog Snapshot', + 'Delete Old Logs', + 'Run URL Task Cycles', + 'Root URL', + 'Internet Archives Probe', + 'Internet Archives Archive', + 'Screenshot', + 'Location ID' + ] + ) + + diff --git a/src/api/endpoints/annotate/_shared/queries/get_next_url_for_user_annotation.py b/src/api/endpoints/annotate/_shared/queries/get_next_url_for_user_annotation.py deleted file mode 100644 index 6eed4b07..00000000 --- a/src/api/endpoints/annotate/_shared/queries/get_next_url_for_user_annotation.py +++ /dev/null @@ -1,80 +0,0 @@ -from sqlalchemy import select, not_, exists -from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import QueryableAttribute, joinedload - -from src.collectors.enums import URLStatus -from src.core.enums import SuggestedStatus -from src.db.client.types import UserSuggestionModel -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion -from src.db.queries.base.builder import QueryBuilderBase -from src.db.statement_composer import StatementComposer - - -class GetNextURLForUserAnnotationQueryBuilder(QueryBuilderBase): - - def __init__( - self, - user_suggestion_model_to_exclude: UserSuggestionModel, - auto_suggestion_relationship: QueryableAttribute, - batch_id: int | None, - check_if_annotated_not_relevant: bool = False - ): - super().__init__() - self.check_if_annotated_not_relevant = check_if_annotated_not_relevant - self.batch_id = batch_id - self.user_suggestion_model_to_exclude = user_suggestion_model_to_exclude - self.auto_suggestion_relationship = auto_suggestion_relationship - - async def run(self, session: AsyncSession): - query = ( - select( - URL, - ) - .outerjoin( - FlagURLValidated, - FlagURLValidated.url_id == URL.id - ) - ) - - if self.batch_id is not None: - query = ( - query - .join(LinkBatchURL) - .where(LinkBatchURL.batch_id == self.batch_id) - ) - - query = ( - query - .where(FlagURLValidated.url_id.is_(None)) - # URL must not have user suggestion - .where( - StatementComposer.user_suggestion_not_exists(self.user_suggestion_model_to_exclude) - ) - ) - - if self.check_if_annotated_not_relevant: - query = query.where( - not_( - exists( - select(UserRelevantSuggestion) - .where( - UserRelevantSuggestion.url_id == URL.id, - UserRelevantSuggestion.suggested_status != SuggestedStatus.RELEVANT.value - ) - ) - ) - ) - - - - query = query.options( - joinedload(self.auto_suggestion_relationship), - joinedload(URL.html_content) - ).limit(1) - - raw_result = await session.execute(query) - - return raw_result.unique().scalars().one_or_none() \ No newline at end of file diff --git a/src/api/endpoints/annotate/agency/get/dto.py b/src/api/endpoints/annotate/agency/get/dto.py index 35288969..a0c06622 100644 --- a/src/api/endpoints/annotate/agency/get/dto.py +++ b/src/api/endpoints/annotate/agency/get/dto.py @@ -13,11 +13,6 @@ class GetNextURLForAgencyAgencyInfo(BaseModel): county: str | None = None locality: str | None = None -class GetNextURLForAgencyAnnotationInnerResponse(AnnotationInnerResponseInfoBase): - agency_suggestions: list[ - GetNextURLForAgencyAgencyInfo - ] - -class GetNextURLForAgencyAnnotationResponse(BaseModel): - next_annotation: GetNextURLForAgencyAnnotationInnerResponse | None - +class AgencySuggestionAndUserCount(BaseModel): + suggestion: GetNextURLForAgencyAgencyInfo + user_count: int \ No newline at end of file diff --git a/src/api/endpoints/annotate/agency/get/queries/next_for_annotation.py b/src/api/endpoints/annotate/agency/get/queries/next_for_annotation.py deleted file mode 100644 index e8fdc6b2..00000000 --- a/src/api/endpoints/annotate/agency/get/queries/next_for_annotation.py +++ /dev/null @@ -1,118 +0,0 @@ -from sqlalchemy import select, exists -from sqlalchemy.ext.asyncio import AsyncSession - -from src.api.endpoints.annotate._shared.queries.get_annotation_batch_info import GetAnnotationBatchInfoQueryBuilder -from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAnnotationResponse, \ - GetNextURLForAgencyAnnotationInnerResponse -from src.api.endpoints.annotate.agency.get.queries.agency_suggestion_.core import GetAgencySuggestionsQueryBuilder -from src.collectors.enums import URLStatus -from src.core.enums import SuggestedStatus -from src.core.tasks.url.operators.html.scraper.parser.util import convert_to_response_html_info -from src.db.dtos.url.mapping import URLMapping -from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion -from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion -from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView -from src.db.queries.base.builder import QueryBuilderBase -from src.db.queries.implementations.core.get.html_content_info import GetHTMLContentInfoQueryBuilder - - -class GetNextURLAgencyForAnnotationQueryBuilder(QueryBuilderBase): - - def __init__( - self, - batch_id: int | None, - user_id: int - ): - super().__init__() - self.batch_id = batch_id - self.user_id = user_id - - async def run( - self, - session: AsyncSession - ) -> GetNextURLForAgencyAnnotationResponse: - """ - Retrieve URL for annotation - The URL must - not be a confirmed URL - not have been annotated by this user - have extant autosuggestions - """ - # Select statement - query = select(URL.id, URL.url) - if self.batch_id is not None: - query = query.join(LinkBatchURL).where(LinkBatchURL.batch_id == self.batch_id) - - # Must not have confirmed agencies - query = query.where( - URL.status == URLStatus.OK.value - ) - - query = ( - query.join( - URLAnnotationFlagsView, - URLAnnotationFlagsView.url_id == URL.id - ) - # Must not have been annotated by a user - .where( - URLAnnotationFlagsView.has_user_agency_suggestion.is_(False), - # Must have extant autosuggestions - URLAnnotationFlagsView.has_auto_agency_suggestion.is_(True) - ) - .join(LinkURLAgency, isouter=True) - .where( - ~exists( - select(LinkURLAgency). - where(LinkURLAgency.url_id == URL.id). - correlate(URL) - ) - ) - # Must not have been marked as "Not Relevant" by this user - .join(UserRelevantSuggestion, isouter=True) - .where( - ~exists( - select(UserRelevantSuggestion). - where( - (UserRelevantSuggestion.user_id == self.user_id) & - (UserRelevantSuggestion.url_id == URL.id) & - (UserRelevantSuggestion.suggested_status != SuggestedStatus.RELEVANT.value) - ).correlate(URL) - ) - ) - ).limit(1) - raw_result = await session.execute(query) - results = raw_result.all() - if len(results) == 0: - return GetNextURLForAgencyAnnotationResponse( - next_annotation=None - ) - - result = results[0] - url_id = result[0] - url = result[1] - - agency_suggestions = await GetAgencySuggestionsQueryBuilder(url_id=url_id).run(session) - - # Get HTML content info - html_content_infos = await GetHTMLContentInfoQueryBuilder(url_id).run(session) - response_html_info = convert_to_response_html_info(html_content_infos) - - return GetNextURLForAgencyAnnotationResponse( - next_annotation=GetNextURLForAgencyAnnotationInnerResponse( - url_info=URLMapping( - url=url, - url_id=url_id - ), - html_info=response_html_info, - agency_suggestions=agency_suggestions, - batch_info=await GetAnnotationBatchInfoQueryBuilder( - batch_id=self.batch_id, - models=[ - UserUrlAgencySuggestion, - ] - ).run(session) - ) - ) \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/get/queries/core.py b/src/api/endpoints/annotate/all/get/queries/core.py index 615beab2..965b99e5 100644 --- a/src/api/endpoints/annotate/all/get/queries/core.py +++ b/src/api/endpoints/annotate/all/get/queries/core.py @@ -1,4 +1,4 @@ -from sqlalchemy import Select, and_, or_ +from sqlalchemy import Select, exists, select from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import joinedload @@ -9,8 +9,6 @@ from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse, \ GetNextURLForAllAnnotationInnerResponse from src.api.endpoints.annotate.all.get.queries.location_.core import GetLocationSuggestionsQueryBuilder -from src.api.endpoints.annotate.all.get.queries.previously_annotated.core import \ - URLPreviouslyAnnotatedByUserCTEContainer from src.api.endpoints.annotate.relevance.get.dto import RelevanceAnnotationResponseInfo from src.collectors.enums import URLStatus from src.db.dto_converter import DTOConverter @@ -20,7 +18,9 @@ from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion from src.db.models.impl.url.suggestion.relevant.auto.sqlalchemy import AutoRelevantSuggestion +from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion from src.db.models.views.unvalidated_url import UnvalidatedURL +from src.db.models.views.url_anno_count import URLAnnotationCount from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView from src.db.queries.base.builder import QueryBuilderBase @@ -40,7 +40,6 @@ async def run( self, session: AsyncSession ) -> GetNextURLForAllAnnotationResponse: - prev_annotated_cte = URLPreviouslyAnnotatedByUserCTEContainer(user_id=self.user_id) query = ( Select(URL) # URL Must be unvalidated @@ -48,16 +47,14 @@ async def run( UnvalidatedURL, UnvalidatedURL.url_id == URL.id ) - # Must not have been previously annotated by user - # TODO (SM422): Remove where conditional on whether it already has user suggestions - .join( - prev_annotated_cte.cte, - prev_annotated_cte.url_id == URL.id - ) .join( URLAnnotationFlagsView, URLAnnotationFlagsView.url_id == URL.id ) + .join( + URLAnnotationCount, + URLAnnotationCount.url_id == URL.id + ) ) if self.batch_id is not None: query = query.join(LinkBatchURL).where(LinkBatchURL.batch_id == self.batch_id) @@ -65,6 +62,14 @@ async def run( query .where( URL.status == URLStatus.OK.value, + # Must not have been previously annotated by user + ~exists( + select(UserURLTypeSuggestion.id) + .where( + UserURLTypeSuggestion.url_id == URL.id, + UserURLTypeSuggestion.user_id == self.user_id, + ) + ) ) ) # Add load options @@ -74,8 +79,10 @@ async def run( joinedload(URL.auto_record_type_suggestion), ) - # TODO (SM422): Add order by highest number of suggestions (auto or user), desc - query = query.order_by(URL.id.asc()).limit(1) + query = query.order_by( + URLAnnotationCount.total_anno_count.desc(), + URL.id.asc() + ).limit(1) raw_results = (await session.execute(query)).unique() url: URL | None = raw_results.scalars().one_or_none() if url is None: diff --git a/src/api/endpoints/annotate/all/get/queries/previously_annotated/build.py b/src/api/endpoints/annotate/all/get/queries/previously_annotated/build.py deleted file mode 100644 index 1d54df46..00000000 --- a/src/api/endpoints/annotate/all/get/queries/previously_annotated/build.py +++ /dev/null @@ -1,37 +0,0 @@ -from sqlalchemy import CTE, select, and_, or_ - -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion -from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion - - -def build_cte(user_id: int) -> CTE: - query = ( - select( - URL.id - ) - ) - for model in [ - UserLocationSuggestion, - UserRelevantSuggestion, - UserRecordTypeSuggestion, - UserUrlAgencySuggestion - ]: - query = query.outerjoin( - model, - and_( - model.url_id == URL.id, - model.user_id == user_id - ) - ) - query = query.where( - and_( - UserLocationSuggestion.user_id.is_(None), - UserRelevantSuggestion.user_id.is_(None), - UserRecordTypeSuggestion.user_id.is_(None), - UserUrlAgencySuggestion.user_id.is_(None) - ) - ) - return query.cte() diff --git a/src/api/endpoints/annotate/all/get/queries/previously_annotated/core.py b/src/api/endpoints/annotate/all/get/queries/previously_annotated/core.py deleted file mode 100644 index 2c91076b..00000000 --- a/src/api/endpoints/annotate/all/get/queries/previously_annotated/core.py +++ /dev/null @@ -1,22 +0,0 @@ -from sqlalchemy import CTE -from sqlalchemy.orm import InstrumentedAttribute - -from src.api.endpoints.annotate.all.get.queries.previously_annotated.build import build_cte - - -class URLPreviouslyAnnotatedByUserCTEContainer: - - def __init__( - self, - user_id: int - ): - self.user_id = user_id - self._cte: CTE = build_cte(user_id=user_id) - - @property - def cte(self) -> CTE: - return self._cte - - @property - def url_id(self) -> InstrumentedAttribute[int]: - return self._cte.c.id \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/post/models/request.py b/src/api/endpoints/annotate/all/post/models/request.py index bd5c0121..e85f2442 100644 --- a/src/api/endpoints/annotate/all/post/models/request.py +++ b/src/api/endpoints/annotate/all/post/models/request.py @@ -1,35 +1,61 @@ -from typing import Optional - from pydantic import BaseModel, model_validator -from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo -from src.core.enums import RecordType, SuggestedStatus +from src.core.enums import RecordType from src.core.exceptions import FailedValidationException +from src.db.models.impl.flag.url_validated.enums import URLType class AllAnnotationPostInfo(BaseModel): - suggested_status: SuggestedStatus + suggested_status: URLType record_type: RecordType | None = None - agency: URLAgencyAnnotationPostInfo | None = None + agency_ids: list[int] location_ids: list[int] - # TODO (SM422): Break up into multiple validation types @model_validator(mode="after") - def allow_record_type_and_agency_only_if_relevant(self): - suggested_status = self.suggested_status - record_type = self.record_type - agency = self.agency + def forbid_record_type_if_meta_url(self): + if self.suggested_status == URLType.META_URL and self.record_type is not None: + raise FailedValidationException("record_type must be None if suggested_status is META_URL") + return self - if suggested_status != SuggestedStatus.RELEVANT: - if record_type is not None: - raise FailedValidationException("record_type must be None if suggested_status is not relevant") + @model_validator(mode="after") + def require_record_type_if_data_source(self): + if self.suggested_status == URLType.DATA_SOURCE and self.record_type is None: + raise FailedValidationException("record_type must be provided if suggested_status is DATA_SOURCE") + return self - if agency is not None: - raise FailedValidationException("agency must be None if suggested_status is not relevant") + @model_validator(mode="after") + def require_location_if_meta_url_or_data_source(self): + if self.suggested_status not in [URLType.META_URL, URLType.DATA_SOURCE]: + return self + if len(self.location_ids) == 0: + raise FailedValidationException("location_ids must be provided if suggested_status is META_URL or DATA_SOURCE") + return self + + @model_validator(mode="after") + def require_agency_id_if_meta_url_or_data_source(self): + if self.suggested_status not in [URLType.META_URL, URLType.DATA_SOURCE]: + return self + if len(self.agency_ids) == 0: + raise FailedValidationException("agencies must be provided if suggested_status is META_URL or DATA_SOURCE") + return self + + @model_validator(mode="after") + def forbid_all_else_if_not_meta_url_or_data_source(self): + if self.suggested_status in [URLType.META_URL, URLType.DATA_SOURCE]: + return self + if self.record_type is not None: + raise FailedValidationException("record_type must be None if suggested_status is not META_URL or DATA_SOURCE") + if len(self.agency_ids) > 0: + raise FailedValidationException("agency_ids must be empty if suggested_status is not META_URL or DATA_SOURCe") + if len(self.location_ids) > 0: + raise FailedValidationException("location_ids must be empty if suggested_status is not META_URL or DATA_SOURCE") + return self + + + @model_validator(mode="after") + def deprecate_agency_meta_url_record_type(self): + if self.record_type is None: return self - # Similarly, if relevant, record_type and agency must be provided - if record_type is None: - raise FailedValidationException("record_type must be provided if suggested_status is relevant") - if agency is None: - raise FailedValidationException("agency must be provided if suggested_status is relevant") - return self \ No newline at end of file + if self.record_type == RecordType.CONTACT_INFO_AND_AGENCY_META: + raise FailedValidationException("Contact Info & Agency Meta Record Type is Deprecated.") + return self diff --git a/src/api/endpoints/annotate/all/post/query.py b/src/api/endpoints/annotate/all/post/query.py index 2203b368..c1d35934 100644 --- a/src/api/endpoints/annotate/all/post/query.py +++ b/src/api/endpoints/annotate/all/post/query.py @@ -1,11 +1,11 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo -from src.core.enums import SuggestedStatus +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion +from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion from src.db.queries.base.builder import QueryBuilderBase @@ -25,17 +25,18 @@ def __init__( async def run(self, session: AsyncSession) -> None: # Add relevant annotation - # TODO: Modify UserRelevantSuggestion to use `URLValidatedType` instead of `SuggestedStatus` - relevant_suggestion = UserRelevantSuggestion( + relevant_suggestion = UserURLTypeSuggestion( url_id=self.url_id, user_id=self.user_id, - suggested_status=self.post_info.suggested_status.value + type=self.post_info.suggested_status ) session.add(relevant_suggestion) # If not relevant, do nothing else - # TODO (SM422): Update to account for change in SuggestedStatus - if not self.post_info.suggested_status == SuggestedStatus.RELEVANT: + if not self.post_info.suggested_status in [ + URLType.META_URL, + URLType.DATA_SOURCE + ]: return locations: list[UserLocationSuggestion] = [] @@ -54,10 +55,10 @@ async def run(self, session: AsyncSession) -> None: ) session.add(record_type_suggestion) - agency_suggestion = UserUrlAgencySuggestion( - url_id=self.url_id, - user_id=self.user_id, - agency_id=self.post_info.agency.suggested_agency, - is_new=self.post_info.agency.is_new - ) - session.add(agency_suggestion) + for agency_id in self.post_info.agency_ids: + agency_suggestion = UserUrlAgencySuggestion( + url_id=self.url_id, + user_id=self.user_id, + agency_id=agency_id, + ) + session.add(agency_suggestion) diff --git a/src/api/endpoints/annotate/dtos/record_type/post.py b/src/api/endpoints/annotate/dtos/record_type/post.py deleted file mode 100644 index a3c7a653..00000000 --- a/src/api/endpoints/annotate/dtos/record_type/post.py +++ /dev/null @@ -1,7 +0,0 @@ -from pydantic import BaseModel - -from src.core.enums import RecordType - - -class RecordTypeAnnotationPostInfo(BaseModel): - record_type: RecordType \ No newline at end of file diff --git a/src/api/endpoints/annotate/dtos/record_type/response.py b/src/api/endpoints/annotate/dtos/record_type/response.py deleted file mode 100644 index 188d6500..00000000 --- a/src/api/endpoints/annotate/dtos/record_type/response.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Optional - -from pydantic import Field, BaseModel - -from src.api.endpoints.annotate.dtos.shared.base.response import AnnotationInnerResponseInfoBase -from src.core.enums import RecordType - - -class GetNextRecordTypeAnnotationResponseInfo( - AnnotationInnerResponseInfoBase -): - suggested_record_type: RecordType | None = Field( - title="What record type, if any, the auto-labeler identified the URL as" - ) - -class GetNextRecordTypeAnnotationResponseOuterInfo( - BaseModel -): - next_annotation: GetNextRecordTypeAnnotationResponseInfo | None diff --git a/src/api/endpoints/annotate/relevance/get/dto.py b/src/api/endpoints/annotate/relevance/get/dto.py index 649367f4..8855fdf3 100644 --- a/src/api/endpoints/annotate/relevance/get/dto.py +++ b/src/api/endpoints/annotate/relevance/get/dto.py @@ -15,11 +15,3 @@ class RelevanceAnnotationResponseInfo(BaseModel): model_name: str | None = Field( title="The name of the model that made the annotation" ) - -class GetNextRelevanceAnnotationResponseInfo(AnnotationInnerResponseInfoBase): - annotation: RelevanceAnnotationResponseInfo | None = Field( - title="The auto-labeler's annotation for relevance" - ) - -class GetNextRelevanceAnnotationResponseOuterInfo(BaseModel): - next_annotation: GetNextRelevanceAnnotationResponseInfo | None diff --git a/src/api/endpoints/annotate/relevance/get/query.py b/src/api/endpoints/annotate/relevance/get/query.py deleted file mode 100644 index 2c616b7b..00000000 --- a/src/api/endpoints/annotate/relevance/get/query.py +++ /dev/null @@ -1,64 +0,0 @@ -from sqlalchemy.ext.asyncio import AsyncSession - -from src.api.endpoints.annotate._shared.queries.get_annotation_batch_info import GetAnnotationBatchInfoQueryBuilder -from src.api.endpoints.annotate._shared.queries.get_next_url_for_user_annotation import \ - GetNextURLForUserAnnotationQueryBuilder -from src.api.endpoints.annotate.relevance.get.dto import GetNextRelevanceAnnotationResponseInfo, \ - RelevanceAnnotationResponseInfo -from src.db.dto_converter import DTOConverter -from src.db.dtos.url.mapping import URLMapping -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion -from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion -from src.db.queries.base.builder import QueryBuilderBase - - -class GetNextUrlForRelevanceAnnotationQueryBuilder(QueryBuilderBase): - - def __init__( - self, - batch_id: int | None - ): - super().__init__() - self.batch_id = batch_id - - async def run( - self, - session: AsyncSession - ) -> GetNextRelevanceAnnotationResponseInfo | None: - url = await GetNextURLForUserAnnotationQueryBuilder( - user_suggestion_model_to_exclude=UserRelevantSuggestion, - auto_suggestion_relationship=URL.auto_relevant_suggestion, - batch_id=self.batch_id - ).run(session) - if url is None: - return None - - # Next, get all HTML content for the URL - html_response_info = DTOConverter.html_content_list_to_html_response_info( - url.html_content - ) - - if url.auto_relevant_suggestion is not None: - suggestion = url.auto_relevant_suggestion - else: - suggestion = None - - return GetNextRelevanceAnnotationResponseInfo( - url_info=URLMapping( - url=url.url, - url_id=url.id - ), - annotation=RelevanceAnnotationResponseInfo( - is_relevant=suggestion.relevant, - confidence=suggestion.confidence, - model_name=suggestion.model_name - ) if suggestion else None, - html_info=html_response_info, - batch_info=await GetAnnotationBatchInfoQueryBuilder( - batch_id=self.batch_id, - models=[ - UserUrlAgencySuggestion, - ] - ).run(session) - ) diff --git a/src/api/endpoints/annotate/relevance/post/dto.py b/src/api/endpoints/annotate/relevance/post/dto.py deleted file mode 100644 index a29a5327..00000000 --- a/src/api/endpoints/annotate/relevance/post/dto.py +++ /dev/null @@ -1,7 +0,0 @@ -from pydantic import BaseModel - -from src.core.enums import SuggestedStatus - - -class RelevanceAnnotationPostInfo(BaseModel): - suggested_status: SuggestedStatus \ No newline at end of file diff --git a/src/api/endpoints/annotate/routes.py b/src/api/endpoints/annotate/routes.py index 80c44cc8..682325e9 100644 --- a/src/api/endpoints/annotate/routes.py +++ b/src/api/endpoints/annotate/routes.py @@ -1,17 +1,11 @@ -from fastapi import APIRouter, Depends, Path, Query +from fastapi import APIRouter, Depends, Query from src.api.dependencies import get_async_core -from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAnnotationResponse -from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo -from src.api.endpoints.annotate.dtos.record_type.post import RecordTypeAnnotationPostInfo -from src.api.endpoints.annotate.dtos.record_type.response import GetNextRecordTypeAnnotationResponseOuterInfo -from src.api.endpoints.annotate.relevance.get.dto import GetNextRelevanceAnnotationResponseOuterInfo -from src.api.endpoints.annotate.relevance.post.dto import RelevanceAnnotationPostInfo from src.core.core import AsyncCore -from src.security.manager import get_access_info from src.security.dtos.access_info import AccessInfo +from src.security.manager import get_access_info annotate_router = APIRouter( prefix="/annotate", @@ -25,105 +19,7 @@ default=None ) -@annotate_router.get("/relevance") -async def get_next_url_for_relevance_annotation( - access_info: AccessInfo = Depends(get_access_info), - async_core: AsyncCore = Depends(get_async_core), - batch_id: int | None = Query( - description="The batch id of the next URL to get. " - "If not specified, defaults to first qualifying URL", - default=None), -) -> GetNextRelevanceAnnotationResponseOuterInfo: - return await async_core.get_next_url_for_relevance_annotation( - user_id=access_info.user_id, - batch_id=batch_id - ) - - -@annotate_router.post("/relevance/{url_id}") -async def annotate_url_for_relevance_and_get_next_url( - relevance_annotation_post_info: RelevanceAnnotationPostInfo, - url_id: int = Path(description="The URL id to annotate"), - async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), - batch_id: int | None = batch_query -) -> GetNextRelevanceAnnotationResponseOuterInfo: - """ - Post URL annotation and get next URL to annotate - """ - await async_core.submit_url_relevance_annotation( - user_id=access_info.user_id, - url_id=url_id, - suggested_status=relevance_annotation_post_info.suggested_status - ) - return await async_core.get_next_url_for_relevance_annotation( - user_id=access_info.user_id, - batch_id=batch_id - ) - -@annotate_router.get("/record-type") -async def get_next_url_for_record_type_annotation( - access_info: AccessInfo = Depends(get_access_info), - async_core: AsyncCore = Depends(get_async_core), - batch_id: int | None = batch_query -) -> GetNextRecordTypeAnnotationResponseOuterInfo: - return await async_core.get_next_url_for_record_type_annotation( - user_id=access_info.user_id, - batch_id=batch_id - ) -@annotate_router.post("/record-type/{url_id}") -async def annotate_url_for_record_type_and_get_next_url( - record_type_annotation_post_info: RecordTypeAnnotationPostInfo, - url_id: int = Path(description="The URL id to annotate"), - async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), - batch_id: int | None = batch_query -) -> GetNextRecordTypeAnnotationResponseOuterInfo: - """ - Post URL annotation and get next URL to annotate - """ - await async_core.submit_url_record_type_annotation( - user_id=access_info.user_id, - url_id=url_id, - record_type=record_type_annotation_post_info.record_type, - ) - return await async_core.get_next_url_for_record_type_annotation( - user_id=access_info.user_id, - batch_id=batch_id - ) - -@annotate_router.get("/agency") -async def get_next_url_for_agency_annotation( - access_info: AccessInfo = Depends(get_access_info), - async_core: AsyncCore = Depends(get_async_core), - batch_id: int | None = batch_query -) -> GetNextURLForAgencyAnnotationResponse: - return await async_core.get_next_url_agency_for_annotation( - user_id=access_info.user_id, - batch_id=batch_id - ) - -@annotate_router.post("/agency/{url_id}") -async def annotate_url_for_agency_and_get_next_url( - url_id: int, - agency_annotation_post_info: URLAgencyAnnotationPostInfo, - async_core: AsyncCore = Depends(get_async_core), - access_info: AccessInfo = Depends(get_access_info), - batch_id: int | None = batch_query -) -> GetNextURLForAgencyAnnotationResponse: - """ - Post URL annotation and get next URL to annotate - """ - await async_core.submit_url_agency_annotation( - user_id=access_info.user_id, - url_id=url_id, - agency_post_info=agency_annotation_post_info - ) - return await async_core.get_next_url_agency_for_annotation( - user_id=access_info.user_id, - batch_id=batch_id - ) @annotate_router.get("/all") async def get_next_url_for_all_annotations( diff --git a/src/api/endpoints/metrics/batches/aggregated/query/core.py b/src/api/endpoints/metrics/batches/aggregated/query/core.py index 2642f002..c17f0f6d 100644 --- a/src/api/endpoints/metrics/batches/aggregated/query/core.py +++ b/src/api/endpoints/metrics/batches/aggregated/query/core.py @@ -17,7 +17,7 @@ from src.collectors.enums import URLStatus, CollectorType from src.core.enums import BatchStatus from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL diff --git a/src/api/endpoints/metrics/batches/aggregated/query/rejected/query.py b/src/api/endpoints/metrics/batches/aggregated/query/rejected/query.py index 6c1d9e0f..7b94f2ba 100644 --- a/src/api/endpoints/metrics/batches/aggregated/query/rejected/query.py +++ b/src/api/endpoints/metrics/batches/aggregated/query/rejected/query.py @@ -5,7 +5,7 @@ from src.api.endpoints.metrics.batches.aggregated.query.models.strategy_count import CountByBatchStrategyResponse from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.queries.base.builder import QueryBuilderBase @@ -30,7 +30,7 @@ async def run( FlagURLValidated, FlagURLValidated.url_id == LinkBatchURL.url_id ) - .where(FlagURLValidated.type == URLValidatedType.NOT_RELEVANT) + .where(FlagURLValidated.type == URLType.NOT_RELEVANT) .group_by(Batch.strategy) ) diff --git a/src/api/endpoints/metrics/batches/breakdown/not_relevant/cte_.py b/src/api/endpoints/metrics/batches/breakdown/not_relevant/cte_.py index 14403e86..6342018b 100644 --- a/src/api/endpoints/metrics/batches/breakdown/not_relevant/cte_.py +++ b/src/api/endpoints/metrics/batches/breakdown/not_relevant/cte_.py @@ -2,7 +2,7 @@ from src.api.endpoints.metrics.batches.breakdown.templates.cte_ import BatchesBreakdownURLCTE from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL @@ -20,7 +20,7 @@ FlagURLValidated.url_id == LinkBatchURL.url_id ) .where( - FlagURLValidated.type == URLValidatedType.NOT_RELEVANT + FlagURLValidated.type == URLType.NOT_RELEVANT ) .group_by(Batch.id) .cte("not_relevant") diff --git a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/rejected.py b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/rejected.py index 983554ab..56655c1b 100644 --- a/src/api/endpoints/metrics/urls/aggregated/query/subqueries/rejected.py +++ b/src/api/endpoints/metrics/urls/aggregated/query/subqueries/rejected.py @@ -1,6 +1,6 @@ from sqlalchemy import select, func -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL @@ -13,6 +13,6 @@ URL.id == FlagURLValidated.url_id, ) .where( - FlagURLValidated.type == URLValidatedType.NOT_RELEVANT, + FlagURLValidated.type == URLType.NOT_RELEVANT, ) ) \ No newline at end of file diff --git a/src/api/endpoints/metrics/urls/breakdown/query/core.py b/src/api/endpoints/metrics/urls/breakdown/query/core.py index 3fc52c3f..e585554c 100644 --- a/src/api/endpoints/metrics/urls/breakdown/query/core.py +++ b/src/api/endpoints/metrics/urls/breakdown/query/core.py @@ -10,7 +10,7 @@ from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion +from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion from src.db.queries.base.builder import QueryBuilderBase @@ -24,7 +24,7 @@ async def run(self, session: AsyncSession) -> GetMetricsURLsBreakdownPendingResp case((UserRecordTypeSuggestion.url_id != None, literal(True)), else_=literal(False)).label( "has_user_record_type_annotation" ), - case((UserRelevantSuggestion.url_id != None, literal(True)), else_=literal(False)).label( + case((UserURLTypeSuggestion.url_id != None, literal(True)), else_=literal(False)).label( "has_user_relevant_annotation" ), case((UserUrlAgencySuggestion.url_id != None, literal(True)), else_=literal(False)).label( @@ -32,7 +32,7 @@ async def run(self, session: AsyncSession) -> GetMetricsURLsBreakdownPendingResp ), ) .outerjoin(UserRecordTypeSuggestion, URL.id == UserRecordTypeSuggestion.url_id) - .outerjoin(UserRelevantSuggestion, URL.id == UserRelevantSuggestion.url_id) + .outerjoin(UserURLTypeSuggestion, URL.id == UserURLTypeSuggestion.url_id) .outerjoin(UserUrlAgencySuggestion, URL.id == UserUrlAgencySuggestion.url_id) ).cte("flags") diff --git a/src/api/endpoints/review/approve/query_/core.py b/src/api/endpoints/review/approve/query_/core.py index 86c0212c..48f0ecae 100644 --- a/src/api/endpoints/review/approve/query_/core.py +++ b/src/api/endpoints/review/approve/query_/core.py @@ -9,7 +9,7 @@ from src.collectors.enums import URLStatus from src.db.constants import PLACEHOLDER_AGENCY_NAME from src.db.models.impl.agency.sqlalchemy import Agency -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL @@ -167,6 +167,6 @@ async def _add_validated_flag( ) -> None: flag = FlagURLValidated( url_id=url.id, - type=URLValidatedType.DATA_SOURCE + type=URLType.DATA_SOURCE ) session.add(flag) diff --git a/src/api/endpoints/review/next/convert.py b/src/api/endpoints/review/next/convert.py index ca087895..2789895f 100644 --- a/src/api/endpoints/review/next/convert.py +++ b/src/api/endpoints/review/next/convert.py @@ -1,4 +1,6 @@ -from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo +from collections import Counter + +from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo, AgencySuggestionAndUserCount from src.api.endpoints.review.next.dto import FinalReviewAnnotationAgencyInfo, FinalReviewAnnotationAgencyAutoInfo from src.core.enums import SuggestionType from src.db.models.impl.agency.sqlalchemy import Agency @@ -11,7 +13,7 @@ def convert_agency_info_to_final_review_annotation_agency_info( subtasks: list[URLAutoAgencyIDSubtask], confirmed_agencies: list[LinkURLAgency], - user_agency_suggestion: UserUrlAgencySuggestion + user_agency_suggestions: list[UserUrlAgencySuggestion] ) -> FinalReviewAnnotationAgencyInfo: confirmed_agency_info: list[GetNextURLForAgencyAgencyInfo] = ( @@ -26,15 +28,15 @@ def convert_agency_info_to_final_review_annotation_agency_info( ) ) - agency_user_info: GetNextURLForAgencyAgencyInfo | None = ( + agency_user_suggestions: list[AgencySuggestionAndUserCount] = ( _convert_user_url_agency_suggestion_to_final_review_annotation_agency_user_info( - user_agency_suggestion + user_agency_suggestions ) ) return FinalReviewAnnotationAgencyInfo( confirmed=confirmed_agency_info, - user=agency_user_info, + user=agency_user_suggestions, auto=agency_auto_info ) @@ -52,19 +54,29 @@ def _convert_confirmed_agencies_to_final_review_annotation_agency_info( return results def _convert_user_url_agency_suggestion_to_final_review_annotation_agency_user_info( - user_url_agency_suggestion: UserUrlAgencySuggestion -) -> GetNextURLForAgencyAgencyInfo | None: - suggestion = user_url_agency_suggestion - if suggestion is None: - return None - if suggestion.is_new: - return GetNextURLForAgencyAgencyInfo( - suggestion_type=SuggestionType.NEW_AGENCY, + user_url_agency_suggestions: list[UserUrlAgencySuggestion] +) -> list[AgencySuggestionAndUserCount]: + agency_id_count: Counter[int] = Counter() + agency_id_to_agency: dict[int, GetNextURLForAgencyAgencyInfo] = {} + for suggestion in user_url_agency_suggestions: + agency_id_count[suggestion.agency_id] += 1 + agency_id_to_agency[suggestion.agency_id] = _convert_agency_to_get_next_url_for_agency_agency_info( + suggestion_type=SuggestionType.USER_SUGGESTION, + agency=suggestion.agency ) - return _convert_agency_to_get_next_url_for_agency_agency_info( - suggestion_type=SuggestionType.USER_SUGGESTION, - agency=suggestion.agency - ) + + suggestions_and_counts: list[AgencySuggestionAndUserCount] = [] + for agency_id, count in agency_id_count.items(): + suggestions_and_counts.append( + AgencySuggestionAndUserCount( + suggestion=agency_id_to_agency[agency_id], + user_count=count + ) + ) + + suggestions_and_counts.sort(key=lambda x: x.user_count, reverse=True) + + return suggestions_and_counts def _convert_agency_to_get_next_url_for_agency_agency_info( suggestion_type: SuggestionType, diff --git a/src/api/endpoints/review/next/core.py b/src/api/endpoints/review/next/core.py index 1736a970..d19d4926 100644 --- a/src/api/endpoints/review/next/core.py +++ b/src/api/endpoints/review/next/core.py @@ -38,13 +38,13 @@ def __init__(self, batch_id: int | None = None): URL.html_content, URL.auto_record_type_suggestion, URL.auto_relevant_suggestion, - URL.user_relevant_suggestion, - URL.user_record_type_suggestion, + URL.user_relevant_suggestions, + URL.user_record_type_suggestions, URL.optional_data_source_metadata, ] # The below relationships are joined to entities that are joined to the URL self.double_join_relationships = [ - (URL.user_agency_suggestion, UserUrlAgencySuggestion.agency), + (URL.user_agency_suggestions, UserUrlAgencySuggestion.agency), (URL.confirmed_agencies, LinkURLAgency.agency) ] @@ -191,16 +191,16 @@ async def run( description=result.description, annotations=FinalReviewAnnotationInfo( relevant=DTOConverter.final_review_annotation_relevant_info( - user_suggestion=result.user_relevant_suggestion, + user_suggestions=result.user_relevant_suggestions, auto_suggestion=result.auto_relevant_suggestion ), record_type=DTOConverter.final_review_annotation_record_type_info( - user_suggestion=result.user_record_type_suggestion, + user_suggestions=result.user_record_type_suggestions, auto_suggestion=result.auto_record_type_suggestion ), agency=convert_agency_info_to_final_review_annotation_agency_info( subtasks=result.auto_agency_subtasks, - user_agency_suggestion=result.user_agency_suggestion, + user_agency_suggestions=result.user_agency_suggestions, confirmed_agencies=result.confirmed_agencies ) ), diff --git a/src/api/endpoints/review/next/dto.py b/src/api/endpoints/review/next/dto.py index e1fa2f74..13a68239 100644 --- a/src/api/endpoints/review/next/dto.py +++ b/src/api/endpoints/review/next/dto.py @@ -1,25 +1,24 @@ -from typing import Optional - from pydantic import BaseModel, Field -from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo +from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo, AgencySuggestionAndUserCount from src.api.endpoints.annotate.relevance.get.dto import RelevanceAnnotationResponseInfo -from src.core.enums import RecordType, SuggestedStatus +from src.core.enums import RecordType from src.core.tasks.url.operators.html.scraper.parser.dtos.response_html import ResponseHTMLInfo +from src.db.models.impl.flag.url_validated.enums import URLType class FinalReviewAnnotationRelevantInfo(BaseModel): auto: RelevanceAnnotationResponseInfo | None = Field(title="Whether the auto-labeler has marked the URL as relevant") - user: SuggestedStatus | None = Field( - title="The status marked by a user, if any", + user: dict[URLType, int] = Field( + title="How users have labeled the URLType" ) class FinalReviewAnnotationRecordTypeInfo(BaseModel): auto: RecordType | None = Field( title="The record type suggested by the auto-labeler" ) - user: RecordType | None = Field( - title="The record type suggested by a user", + user: dict[RecordType, int] = Field( + title="The record types suggested by other users", ) # region Agency @@ -36,8 +35,8 @@ class FinalReviewAnnotationAgencyInfo(BaseModel): ) auto: FinalReviewAnnotationAgencyAutoInfo | None = Field( title="A single agency or a list of agencies suggested by the auto-labeler",) - user: GetNextURLForAgencyAgencyInfo | None = Field( - title="A single agency suggested by a user", + user: list[AgencySuggestionAndUserCount] = Field( + title="Agencies suggested by users", ) # endregion diff --git a/src/api/endpoints/review/reject/query.py b/src/api/endpoints/review/reject/query.py index c187a2a8..89509dfc 100644 --- a/src/api/endpoints/review/reject/query.py +++ b/src/api/endpoints/review/reject/query.py @@ -5,7 +5,7 @@ from src.api.endpoints.review.enums import RejectionReason from src.collectors.enums import URLStatus -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.reviewing_user import ReviewingUserURL @@ -35,14 +35,14 @@ async def run(self, session) -> None: url = await session.execute(query) url = url.scalars().first() - validation_type: URLValidatedType | None = None + validation_type: URLType | None = None match self.rejection_reason: case RejectionReason.INDIVIDUAL_RECORD: - validation_type = URLValidatedType.INDIVIDUAL_RECORD + validation_type = URLType.INDIVIDUAL_RECORD case RejectionReason.BROKEN_PAGE_404: url.status = URLStatus.NOT_FOUND.value case RejectionReason.NOT_RELEVANT: - validation_type = URLValidatedType.NOT_RELEVANT + validation_type = URLType.NOT_RELEVANT case _: raise HTTPException( status_code=HTTP_400_BAD_REQUEST, diff --git a/src/core/core.py b/src/core/core.py index 4051b8f2..cd2b9be2 100644 --- a/src/core/core.py +++ b/src/core/core.py @@ -5,13 +5,10 @@ from pydantic import BaseModel from sqlalchemy.exc import IntegrityError -from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAnnotationResponse from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo from src.api.endpoints.annotate.all.post.query import AddAllAnnotationsToURLQueryBuilder -from src.api.endpoints.annotate.dtos.record_type.response import GetNextRecordTypeAnnotationResponseOuterInfo -from src.api.endpoints.annotate.relevance.get.dto import GetNextRelevanceAnnotationResponseOuterInfo from src.api.endpoints.batch.dtos.get.logs import GetBatchLogsResponse from src.api.endpoints.batch.dtos.get.summaries.response import GetBatchSummariesResponse from src.api.endpoints.batch.dtos.get.summaries.summary import BatchSummary @@ -33,18 +30,17 @@ from src.api.endpoints.review.next.dto import GetNextURLForFinalReviewOuterResponse from src.api.endpoints.search.dtos.response import SearchURLResponse from src.api.endpoints.task.by_id.dto import TaskInfo +from src.api.endpoints.task.dtos.get.task_status import GetTaskStatusResponseInfo from src.api.endpoints.task.dtos.get.tasks import GetTasksResponse from src.api.endpoints.url.get.dto import GetURLsResponseInfo -from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.batch.pydantic.info import BatchInfo -from src.api.endpoints.task.dtos.get.task_status import GetTaskStatusResponseInfo -from src.db.enums import TaskType -from src.collectors.manager import AsyncCollectorManager from src.collectors.enums import CollectorType -from src.core.tasks.url.manager import TaskManager +from src.collectors.manager import AsyncCollectorManager +from src.core.enums import BatchStatus, RecordType, AnnotationType from src.core.error_manager.core import ErrorManager -from src.core.enums import BatchStatus, RecordType, AnnotationType, SuggestedStatus - +from src.core.tasks.url.manager import TaskManager +from src.db.client.async_ import AsyncDatabaseClient +from src.db.enums import TaskType +from src.db.models.impl.batch.pydantic.info import BatchInfo from src.security.dtos.access_info import AccessInfo @@ -169,98 +165,6 @@ async def get_task_info(self, task_id: int) -> TaskInfo: #region Annotations and Review - async def submit_url_relevance_annotation( - self, - user_id: int, - url_id: int, - suggested_status: SuggestedStatus - ): - try: - return await self.adb_client.add_user_relevant_suggestion( - user_id=user_id, - url_id=url_id, - suggested_status=suggested_status - ) - except IntegrityError: - return await ErrorManager.raise_annotation_exists_error( - annotation_type=AnnotationType.RELEVANCE, - url_id=url_id - ) - - async def get_next_url_for_relevance_annotation( - self, - user_id: int, - batch_id: Optional[int] - ) -> GetNextRelevanceAnnotationResponseOuterInfo: - next_annotation = await self.adb_client.get_next_url_for_relevance_annotation( - user_id=user_id, - batch_id=batch_id - ) - return GetNextRelevanceAnnotationResponseOuterInfo( - next_annotation=next_annotation - ) - - async def get_next_url_for_record_type_annotation( - self, - user_id: int, - batch_id: Optional[int] - ) -> GetNextRecordTypeAnnotationResponseOuterInfo: - next_annotation = await self.adb_client.get_next_url_for_record_type_annotation( - user_id=user_id, - batch_id=batch_id - ) - return GetNextRecordTypeAnnotationResponseOuterInfo( - next_annotation=next_annotation - ) - - async def submit_url_record_type_annotation( - self, - user_id: int, - url_id: int, - record_type: RecordType, - ): - try: - return await self.adb_client.add_user_record_type_suggestion( - user_id=user_id, - url_id=url_id, - record_type=record_type - ) - except IntegrityError: - return await ErrorManager.raise_annotation_exists_error( - annotation_type=AnnotationType.RECORD_TYPE, - url_id=url_id - ) - - - async def get_next_url_agency_for_annotation( - self, - user_id: int, - batch_id: Optional[int] - ) -> GetNextURLForAgencyAnnotationResponse: - return await self.adb_client.get_next_url_agency_for_annotation( - user_id=user_id, - batch_id=batch_id - ) - - async def submit_url_agency_annotation( - self, - user_id: int, - url_id: int, - agency_post_info: URLAgencyAnnotationPostInfo - ) -> GetNextURLForAgencyAnnotationResponse: - if not agency_post_info.is_new and not agency_post_info.suggested_agency: - raise ValueError("suggested_agency must be provided if is_new is False") - - if agency_post_info.is_new: - agency_suggestion_id = None - else: - agency_suggestion_id = agency_post_info.suggested_agency - return await self.adb_client.add_agency_manual_suggestion( - user_id=user_id, - url_id=url_id, - agency_id=agency_suggestion_id, - is_new=agency_post_info.is_new, - ) async def get_next_source_for_review( self, diff --git a/src/core/enums.py b/src/core/enums.py index 4fa903c1..4d11c7af 100644 --- a/src/core/enums.py +++ b/src/core/enums.py @@ -83,13 +83,3 @@ class SubmitResponseStatus(Enum): SUCCESS = "success" FAILURE = "FAILURE" ALREADY_EXISTS = "already_exists" - -# TODO (SM422): Replace use of SuggestedStatus with URLValidationType -class SuggestedStatus(Enum): - """ - Possible values for user_relevant_suggestions:suggested_status - """ - RELEVANT = "relevant" - NOT_RELEVANT = "not relevant" - INDIVIDUAL_RECORD = "individual record" - BROKEN_PAGE_404 = "broken page/404 not found" \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/huggingface/queries/get/convert.py b/src/core/tasks/scheduled/impl/huggingface/queries/get/convert.py index 5ad96115..41926fe4 100644 --- a/src/core/tasks/scheduled/impl/huggingface/queries/get/convert.py +++ b/src/core/tasks/scheduled/impl/huggingface/queries/get/convert.py @@ -1,7 +1,7 @@ from src.core.enums import RecordType from src.core.tasks.scheduled.impl.huggingface.queries.get.enums import RecordTypeCoarse from src.core.tasks.scheduled.impl.huggingface.queries.get.mappings import FINE_COARSE_RECORD_TYPE_MAPPING -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType def convert_fine_to_coarse_record_type( @@ -11,12 +11,12 @@ def convert_fine_to_coarse_record_type( def convert_validated_type_to_relevant( - validated_type: URLValidatedType + validated_type: URLType ) -> bool: match validated_type: - case URLValidatedType.NOT_RELEVANT: + case URLType.NOT_RELEVANT: return False - case URLValidatedType.DATA_SOURCE: + case URLType.DATA_SOURCE: return True case _: raise ValueError(f"Disallowed validated type: {validated_type}") \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/huggingface/queries/get/core.py b/src/core/tasks/scheduled/impl/huggingface/queries/get/core.py index d58cbdf7..886bd65d 100644 --- a/src/core/tasks/scheduled/impl/huggingface/queries/get/core.py +++ b/src/core/tasks/scheduled/impl/huggingface/queries/get/core.py @@ -6,7 +6,7 @@ from src.core.tasks.scheduled.impl.huggingface.queries.get.model import GetForLoadingToHuggingFaceOutput from src.db.client.helpers import add_standard_limit_and_offset from src.db.helpers.session import session_helper as sh -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML @@ -47,8 +47,8 @@ async def run(self, session: AsyncSession) -> list[GetForLoadingToHuggingFaceOut ) .where( FlagURLValidated.type.in_( - (URLValidatedType.DATA_SOURCE, - URLValidatedType.NOT_RELEVANT) + (URLType.DATA_SOURCE, + URLType.NOT_RELEVANT) ) ) ) @@ -63,7 +63,7 @@ async def run(self, session: AsyncSession) -> list[GetForLoadingToHuggingFaceOut url_id=result[label_url_id], url=result[label_url], relevant=convert_validated_type_to_relevant( - URLValidatedType(result[label_type]) + URLType(result[label_type]) ), record_type_fine=result[label_record_type_fine], record_type_coarse=convert_fine_to_coarse_record_type( diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/lookup_/links.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/lookup_/links.py index 9336deaa..9a083719 100644 --- a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/lookup_/links.py +++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/links/lookup_/links.py @@ -4,7 +4,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.db.helpers.session import session_helper as sh -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.pydantic import LinkURLAgencyPydantic from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency @@ -34,7 +34,7 @@ async def run(self, session: AsyncSession) -> list[LinkURLAgencyPydantic]: FlagURLValidated.url_id == URL.id, ) .where( - FlagURLValidated.type == URLValidatedType.META_URL, + FlagURLValidated.type == URLType.META_URL, LinkURLAgency.agency_id.in_(self.agency_ids), ) ) diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/add/core.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/add/core.py index 73761251..f1bf793d 100644 --- a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/add/core.py +++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/add/core.py @@ -2,7 +2,7 @@ from src.core.enums import RecordType from src.db.dtos.url.mapping import URLMapping -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.pydantic.insert import URLInsertModel @@ -49,7 +49,7 @@ async def run(self, session: AsyncSession) -> list[URLMapping]: flag_inserts.append( FlagURLValidatedPydantic( url_id=url_id, - type=URLValidatedType.META_URL + type=URLType.META_URL ) ) await sh.bulk_insert(session, models=flag_inserts) diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/lookup/response.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/lookup/response.py index ff2d668d..da33244e 100644 --- a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/lookup/response.py +++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/lookup/response.py @@ -1,14 +1,14 @@ from pydantic import BaseModel from src.core.enums import RecordType -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType class MetaURLLookupResponse(BaseModel): url: str url_id: int | None record_type: RecordType | None - validation_type: URLValidatedType | None + validation_type: URLType | None @property def exists_in_db(self) -> bool: diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/filter.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/filter.py index b0c32a7e..74cae709 100644 --- a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/filter.py +++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/filter.py @@ -1,6 +1,6 @@ from src.core.enums import RecordType from src.core.tasks.scheduled.impl.sync.agency.queries.upsert.meta_urls.update.params import UpdateMetaURLsParams -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType def filter_urls_with_non_meta_record_type( @@ -31,7 +31,7 @@ def filter_urls_with_non_meta_url_validation_flag( for param in params: if param.validation_type is None: continue - if param.validation_type != URLValidatedType.META_URL: + if param.validation_type != URLType.META_URL: url_ids.append(param.url_id) return url_ids \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/params.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/params.py index cb74a378..c25f3bf1 100644 --- a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/params.py +++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/params.py @@ -1,11 +1,11 @@ from pydantic import BaseModel from src.core.enums import RecordType -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType class UpdateMetaURLsParams(BaseModel): - validation_type: URLValidatedType | None + validation_type: URLType | None url_id: int record_type: RecordType | None diff --git a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/requester.py b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/requester.py index 175b1bbf..94cdc401 100644 --- a/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/requester.py +++ b/src/core/tasks/scheduled/impl/sync/agency/queries/upsert/meta_urls/update/requester.py @@ -1,7 +1,7 @@ from sqlalchemy import update from src.core.enums import RecordType -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL @@ -21,7 +21,7 @@ async def update_validation_flags(self, url_ids: list[int]) -> None: FlagURLValidated.url_id.in_(url_ids) ) .values( - type=URLValidatedType.META_URL + type=URLType.META_URL ) ) await self.session.execute(query) @@ -31,7 +31,7 @@ async def add_validation_flags(self, url_ids: list[int]) -> None: for url_id in url_ids: flag = FlagURLValidatedPydantic( url_id=url_id, - type=URLValidatedType.META_URL, + type=URLType.META_URL, ) inserts.append(flag) diff --git a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/core.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/core.py index 93c1cbc9..a000783b 100644 --- a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/core.py +++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/agency/core.py @@ -5,7 +5,7 @@ from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.convert import convert_to_link_url_agency_models from src.db.helpers.session import session_helper as sh -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.pydantic import LinkURLAgencyPydantic from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.agency.params import UpdateLinkURLAgencyParams @@ -45,7 +45,7 @@ async def _get_existing_links(self, session: AsyncSession) -> None: LinkURLAgency.url_id.in_( self.existing_url_ids ), - FlagURLValidated.type != URLValidatedType.META_URL + FlagURLValidated.type != URLType.META_URL ) ) links = await session.scalars(query) diff --git a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/convert.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/convert.py index e2def8c2..ed5ff8ac 100644 --- a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/convert.py +++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/convert.py @@ -1,6 +1,6 @@ from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.lookup.response import URLDataSyncInfo from src.db.dtos.url.mapping import URLMapping -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.external.pdap.enums import ApprovalStatus @@ -14,11 +14,11 @@ def convert_url_sync_info_to_url_mappings( def convert_approval_status_to_validated_type( approval_status: ApprovalStatus -) -> URLValidatedType: +) -> URLType: match approval_status: case ApprovalStatus.APPROVED: - return URLValidatedType.DATA_SOURCE + return URLType.DATA_SOURCE case ApprovalStatus.REJECTED: - return URLValidatedType.NOT_RELEVANT + return URLType.NOT_RELEVANT case _: raise ValueError(f"Invalid approval status: {approval_status}") \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/param_manager.py b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/param_manager.py index e0a7225f..dd45f727 100644 --- a/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/param_manager.py +++ b/src/core/tasks/scheduled/impl/sync/data_sources/queries/upsert/param_manager.py @@ -12,7 +12,7 @@ from src.core.tasks.scheduled.impl.sync.data_sources.queries.upsert.url.update.params import \ UpdateURLForDataSourcesSyncParams from src.db.dtos.url.mapping import URLMapping -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.pydantic import LinkURLAgencyPydantic @@ -116,7 +116,7 @@ def upsert_validated_flags( url_id: int = mapper.get_id(url) sync_info: DataSourcesSyncResponseInnerInfo = self._mapper.get(url) approval_status: ApprovalStatus = sync_info.approval_status - validated_type: URLValidatedType = convert_approval_status_to_validated_type(approval_status) + validated_type: URLType = convert_approval_status_to_validated_type(approval_status) flag = FlagURLValidatedPydantic( url_id=url_id, type=validated_type diff --git a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/whitelisted_root_urls.py b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/whitelisted_root_urls.py index 1af8f46c..272717b5 100644 --- a/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/whitelisted_root_urls.py +++ b/src/core/tasks/url/operators/agency_identification/subtasks/impl/homepage_match_/queries/ctes/whitelisted_root_urls.py @@ -1,7 +1,7 @@ from sqlalchemy import CTE, select, func from src.db.models.impl.flag.root_url.sqlalchemy import FlagRootURL -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL @@ -32,7 +32,7 @@ ) .where( # The connected URLs must be Meta URLs - FlagURLValidated.type == URLValidatedType.META_URL, + FlagURLValidated.type == URLType.META_URL, # Root URL can't be "https://catalog.data.gov" URL.url != "https://catalog.data.gov" ) diff --git a/src/core/tasks/url/operators/submit_approved/queries/cte.py b/src/core/tasks/url/operators/submit_approved/queries/cte.py index ccd55c8d..1ef5617f 100644 --- a/src/core/tasks/url/operators/submit_approved/queries/cte.py +++ b/src/core/tasks/url/operators/submit_approved/queries/cte.py @@ -2,7 +2,7 @@ from sqlalchemy.orm import aliased from src.collectors.enums import URLStatus -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource @@ -15,7 +15,7 @@ ) .where( URL.status == URLStatus.OK, - FlagURLValidated.type == URLValidatedType.DATA_SOURCE, + FlagURLValidated.type == URLType.DATA_SOURCE, ~exists().where( URLDataSource.url_id == URL.id ) diff --git a/src/api/endpoints/annotate/all/get/queries/previously_annotated/__init__.py b/src/core/tasks/url/operators/validate/__init__.py similarity index 100% rename from src/api/endpoints/annotate/all/get/queries/previously_annotated/__init__.py rename to src/core/tasks/url/operators/validate/__init__.py diff --git a/src/core/tasks/url/operators/validate/core.py b/src/core/tasks/url/operators/validate/core.py new file mode 100644 index 00000000..23ca00c1 --- /dev/null +++ b/src/core/tasks/url/operators/validate/core.py @@ -0,0 +1,23 @@ +from src.core.tasks.url.operators.base import URLTaskOperatorBase +from src.db.enums import TaskType + + +class AutoValidateURLTaskOperator(URLTaskOperatorBase): + + @property + def task_type(self) -> TaskType: + return TaskType.AUTO_VALIDATE + + async def meets_task_prerequisites(self) -> bool: + raise NotImplementedError + + async def inner_task_logic(self) -> None: + # TODO (SM422): Implement + + # Get URLs for auto validation + + # Link + + # Add Validation Objects (Flag and ValidationType) + + raise NotImplementedError \ No newline at end of file diff --git a/src/api/endpoints/annotate/dtos/record_type/__init__.py b/src/core/tasks/url/operators/validate/queries/__init__.py similarity index 100% rename from src/api/endpoints/annotate/dtos/record_type/__init__.py rename to src/core/tasks/url/operators/validate/queries/__init__.py diff --git a/src/core/tasks/url/operators/validate/queries/cte.py b/src/core/tasks/url/operators/validate/queries/cte.py new file mode 100644 index 00000000..3421977b --- /dev/null +++ b/src/core/tasks/url/operators/validate/queries/cte.py @@ -0,0 +1,8 @@ + + +class AutoValidatedTaskOperatorPrerequisitesCTEContainer: + + def __init__(self): + self._query = ( + # TODO: Complete + ) \ No newline at end of file diff --git a/src/api/endpoints/annotate/relevance/post/__init__.py b/src/core/tasks/url/operators/validate/queries/get/__init__.py similarity index 100% rename from src/api/endpoints/annotate/relevance/post/__init__.py rename to src/core/tasks/url/operators/validate/queries/get/__init__.py diff --git a/src/core/tasks/url/operators/validate/queries/get/core.py b/src/core/tasks/url/operators/validate/queries/get/core.py new file mode 100644 index 00000000..aad27236 --- /dev/null +++ b/src/core/tasks/url/operators/validate/queries/get/core.py @@ -0,0 +1,20 @@ +from typing import Any + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.queries.base.builder import QueryBuilderBase + + +class GetURLsForAutoValidationQueryBuilder(QueryBuilderBase): + + + async def run(self, session: AsyncSession) -> Any: + # TODO (SM422): Implement + + query = ( + select( + URL.id + ) + ) \ No newline at end of file diff --git a/tests/automated/integration/api/annotate/agency/__init__.py b/src/core/tasks/url/operators/validate/queries/prereq/__init__.py similarity index 100% rename from tests/automated/integration/api/annotate/agency/__init__.py rename to src/core/tasks/url/operators/validate/queries/prereq/__init__.py diff --git a/src/db/client/async_.py b/src/db/client/async_.py index fc5e013f..2e186f7c 100644 --- a/src/db/client/async_.py +++ b/src/db/client/async_.py @@ -8,16 +8,9 @@ from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker from sqlalchemy.orm import selectinload, QueryableAttribute -from src.api.endpoints.annotate._shared.queries.get_annotation_batch_info import GetAnnotationBatchInfoQueryBuilder -from src.api.endpoints.annotate._shared.queries.get_next_url_for_user_annotation import \ - GetNextURLForUserAnnotationQueryBuilder -from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAnnotationResponse -from src.api.endpoints.annotate.agency.get.queries.next_for_annotation import GetNextURLAgencyForAnnotationQueryBuilder + from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.api.endpoints.annotate.all.get.queries.core import GetNextURLForAllAnnotationQueryBuilder -from src.api.endpoints.annotate.dtos.record_type.response import GetNextRecordTypeAnnotationResponseInfo -from src.api.endpoints.annotate.relevance.get.dto import GetNextRelevanceAnnotationResponseInfo -from src.api.endpoints.annotate.relevance.get.query import GetNextUrlForRelevanceAnnotationQueryBuilder from src.api.endpoints.batch.dtos.get.summaries.response import GetBatchSummariesResponse from src.api.endpoints.batch.dtos.get.summaries.summary import BatchSummary from src.api.endpoints.batch.duplicates.query import GetDuplicatesByBatchIDQueryBuilder @@ -51,7 +44,7 @@ from src.api.endpoints.url.get.query import GetURLsQueryBuilder from src.collectors.enums import URLStatus, CollectorType from src.collectors.queries.insert.urls.query import InsertURLsQueryBuilder -from src.core.enums import BatchStatus, RecordType, SuggestedStatus +from src.core.enums import BatchStatus, RecordType from src.core.env_var_manager import EnvVarManager from src.core.tasks.scheduled.impl.huggingface.queries.state import SetHuggingFaceUploadStateQueryBuilder from src.core.tasks.scheduled.impl.sync.agency.dtos.parameters import AgencySyncParameters @@ -100,6 +93,7 @@ from src.db.models.impl.batch.pydantic.info import BatchInfo from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.duplicate.pydantic.info import DuplicateInfo +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.task_url import LinkTaskURL from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency @@ -123,7 +117,7 @@ from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.models.impl.url.suggestion.relevant.auto.pydantic.input import AutoRelevancyAnnotationInput from src.db.models.impl.url.suggestion.relevant.auto.sqlalchemy import AutoRelevantSuggestion -from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion +from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.models.templates_.base import Base from src.db.queries.base.builder import QueryBuilderBase @@ -300,22 +294,6 @@ async def get_user_suggestion( result = await session.execute(statement) return result.unique().scalar_one_or_none() - async def get_next_url_for_user_annotation( - self, - user_suggestion_model_to_exclude: UserSuggestionModel, - auto_suggestion_relationship: QueryableAttribute, - batch_id: int | None, - check_if_annotated_not_relevant: bool = False - ) -> URL: - return await self.run_query_builder( - builder=GetNextURLForUserAnnotationQueryBuilder( - user_suggestion_model_to_exclude=user_suggestion_model_to_exclude, - auto_suggestion_relationship=auto_suggestion_relationship, - batch_id=batch_id, - check_if_annotated_not_relevant=check_if_annotated_not_relevant - ) - ) - async def get_tdos_for_auto_relevancy(self) -> list[URLRelevantTDO]: return await self.run_query_builder(builder=GetAutoRelevantTDOsQueryBuilder()) @@ -325,78 +303,29 @@ async def add_user_relevant_suggestion( session: AsyncSession, url_id: int, user_id: int, - suggested_status: SuggestedStatus + suggested_status: URLType ): prior_suggestion = await self.get_user_suggestion( session, - model=UserRelevantSuggestion, + model=UserURLTypeSuggestion, user_id=user_id, url_id=url_id ) if prior_suggestion is not None: - prior_suggestion.suggested_status = suggested_status.value + prior_suggestion.type = suggested_status.value return - suggestion = UserRelevantSuggestion( + suggestion = UserURLTypeSuggestion( url_id=url_id, user_id=user_id, - suggested_status=suggested_status.value + type=suggested_status.value ) session.add(suggestion) - async def get_next_url_for_relevance_annotation( - self, - batch_id: int | None, - user_id: int | None = None, - ) -> GetNextRelevanceAnnotationResponseInfo | None: - return await self.run_query_builder(GetNextUrlForRelevanceAnnotationQueryBuilder(batch_id)) - # endregion relevant # region record_type - @session_manager - async def get_next_url_for_record_type_annotation( - self, - session: AsyncSession, - user_id: int, - batch_id: int | None - ) -> GetNextRecordTypeAnnotationResponseInfo | None: - - url = await GetNextURLForUserAnnotationQueryBuilder( - user_suggestion_model_to_exclude=UserRecordTypeSuggestion, - auto_suggestion_relationship=URL.auto_record_type_suggestion, - batch_id=batch_id, - check_if_annotated_not_relevant=True - ).run(session) - if url is None: - return None - - # Next, get all HTML content for the URL - html_response_info = DTOConverter.html_content_list_to_html_response_info( - url.html_content - ) - - if url.auto_record_type_suggestion is not None: - suggestion = url.auto_record_type_suggestion.record_type - else: - suggestion = None - - return GetNextRecordTypeAnnotationResponseInfo( - url_info=URLMapping( - url=url.url, - url_id=url.id - ), - suggested_record_type=suggestion, - html_info=html_response_info, - batch_info=await GetAnnotationBatchInfoQueryBuilder( - batch_id=batch_id, - models=[ - UserUrlAgencySuggestion, - ] - ).run(session) - ) - @session_manager async def add_auto_record_type_suggestions( self, @@ -718,20 +647,6 @@ async def get_tasks( tasks=final_results ) - - - async def get_next_url_agency_for_annotation( - self, - user_id: int, - batch_id: int | None - ) -> GetNextURLForAgencyAnnotationResponse: - return await self.run_query_builder( - builder=GetNextURLAgencyForAnnotationQueryBuilder( - user_id=user_id, - batch_id=batch_id - ) - ) - @session_manager async def upsert_new_agencies( self, diff --git a/src/db/client/types.py b/src/db/client/types.py index 02c0e39b..ffce5621 100644 --- a/src/db/client/types.py +++ b/src/db/client/types.py @@ -1,5 +1,5 @@ from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion +from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion -UserSuggestionModel = UserRelevantSuggestion or UserRecordTypeSuggestion or UserUrlAgencySuggestion +UserSuggestionModel = UserURLTypeSuggestion or UserRecordTypeSuggestion or UserUrlAgencySuggestion diff --git a/src/db/constants.py b/src/db/constants.py index f2cdefb1..a3574a96 100644 --- a/src/db/constants.py +++ b/src/db/constants.py @@ -1,13 +1,13 @@ from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion +from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion PLACEHOLDER_AGENCY_NAME = "PLACEHOLDER_AGENCY_NAME" STANDARD_ROW_LIMIT = 100 USER_ANNOTATION_MODELS = [ - UserRelevantSuggestion, + UserURLTypeSuggestion, UserRecordTypeSuggestion, UserUrlAgencySuggestion ] \ No newline at end of file diff --git a/src/db/dto_converter.py b/src/db/dto_converter.py index b19b834d..f0c9b097 100644 --- a/src/db/dto_converter.py +++ b/src/db/dto_converter.py @@ -1,3 +1,5 @@ +from collections import Counter + from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo from src.api.endpoints.annotate.relevance.get.dto import RelevanceAnnotationResponseInfo from src.api.endpoints.review.next.dto import FinalReviewAnnotationRelevantInfo, FinalReviewAnnotationRecordTypeInfo, \ @@ -15,7 +17,7 @@ from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.models.impl.url.suggestion.relevant.auto.sqlalchemy import AutoRelevantSuggestion -from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion +from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion class DTOConverter: @@ -26,7 +28,7 @@ class DTOConverter: @staticmethod def final_review_annotation_relevant_info( - user_suggestion: UserRelevantSuggestion, + user_suggestions: list[UserURLTypeSuggestion], auto_suggestion: AutoRelevantSuggestion ) -> FinalReviewAnnotationRelevantInfo: @@ -36,15 +38,17 @@ def final_review_annotation_relevant_info( model_name=auto_suggestion.model_name ) if auto_suggestion else None - user_value = user_suggestion.suggested_status if user_suggestion else None + + user_types = [suggestion.type for suggestion in user_suggestions] + counter = Counter(user_types) return FinalReviewAnnotationRelevantInfo( auto=auto_value, - user=user_value + user=dict(counter) ) @staticmethod def final_review_annotation_record_type_info( - user_suggestion: UserRecordTypeSuggestion, + user_suggestions: list[UserRecordTypeSuggestion], auto_suggestion: AutoRecordTypeSuggestion ): @@ -52,10 +56,10 @@ def final_review_annotation_record_type_info( auto_value = None else: auto_value = RecordType(auto_suggestion.record_type) - if user_suggestion is None: - user_value = None - else: - user_value = RecordType(user_suggestion.record_type) + + record_types: list[RecordType] = [suggestion.record_type for suggestion in user_suggestions] + counter = Counter(record_types) + user_value = dict(counter) return FinalReviewAnnotationRecordTypeInfo( auto=auto_value, diff --git a/src/db/enums.py b/src/db/enums.py index 62cf6ec0..84d2c199 100644 --- a/src/db/enums.py +++ b/src/db/enums.py @@ -49,6 +49,7 @@ class TaskType(PyEnum): IA_SAVE = "Internet Archives Archive" SCREENSHOT = "Screenshot" LOCATION_ID = "Location ID" + AUTO_VALIDATE = "Auto Validate" # Scheduled Tasks PUSH_TO_HUGGINGFACE = "Push to Hugging Face" diff --git a/tests/automated/integration/api/annotate/record_type/__init__.py b/src/db/models/impl/flag/auto_validated/__init__.py similarity index 100% rename from tests/automated/integration/api/annotate/record_type/__init__.py rename to src/db/models/impl/flag/auto_validated/__init__.py diff --git a/src/db/models/impl/flag/auto_validated/pydantic.py b/src/db/models/impl/flag/auto_validated/pydantic.py new file mode 100644 index 00000000..da1efb7b --- /dev/null +++ b/src/db/models/impl/flag/auto_validated/pydantic.py @@ -0,0 +1,12 @@ +from pydantic import BaseModel + +from src.db.models.impl.flag.auto_validated.sqlalchemy import FlagURLAutoValidated + + +class FlagURLAutoValidatedPydantic(BaseModel): + + url_id: int + + @classmethod + def sa_model(cls) -> type[FlagURLAutoValidated]: + return FlagURLAutoValidated \ No newline at end of file diff --git a/src/db/models/impl/flag/auto_validated/sqlalchemy.py b/src/db/models/impl/flag/auto_validated/sqlalchemy.py new file mode 100644 index 00000000..a0ce02b9 --- /dev/null +++ b/src/db/models/impl/flag/auto_validated/sqlalchemy.py @@ -0,0 +1,18 @@ +from sqlalchemy import PrimaryKeyConstraint + +from src.db.models.mixins import URLDependentMixin, CreatedAtMixin +from src.db.models.templates_.base import Base + + +class FlagURLAutoValidated( + Base, + URLDependentMixin, + CreatedAtMixin +): + + __tablename__ = 'flag_url_auto_validated' + __table_args__ = ( + PrimaryKeyConstraint( + "url_id" + ), + ) \ No newline at end of file diff --git a/src/db/models/impl/flag/url_validated/enums.py b/src/db/models/impl/flag/url_validated/enums.py index 1dda4a69..7c410493 100644 --- a/src/db/models/impl/flag/url_validated/enums.py +++ b/src/db/models/impl/flag/url_validated/enums.py @@ -1,8 +1,7 @@ from enum import Enum -# TODO (SM422): Rename to URLType -class URLValidatedType(Enum): +class URLType(Enum): DATA_SOURCE = "data source" META_URL = "meta url" NOT_RELEVANT = "not relevant" diff --git a/src/db/models/impl/flag/url_validated/pydantic.py b/src/db/models/impl/flag/url_validated/pydantic.py index 197c05a0..a8bd5b42 100644 --- a/src/db/models/impl/flag/url_validated/pydantic.py +++ b/src/db/models/impl/flag/url_validated/pydantic.py @@ -1,4 +1,4 @@ -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.templates.markers.bulk.insert import BulkInsertableModel from src.db.templates.markers.bulk.upsert import BulkUpsertableModel @@ -11,7 +11,7 @@ class FlagURLValidatedPydantic( ): url_id: int - type: URLValidatedType + type: URLType @classmethod def sa_model(cls) -> type_[FlagURLValidated]: diff --git a/src/db/models/impl/flag/url_validated/sqlalchemy.py b/src/db/models/impl/flag/url_validated/sqlalchemy.py index f6d4e770..97abf056 100644 --- a/src/db/models/impl/flag/url_validated/sqlalchemy.py +++ b/src/db/models/impl/flag/url_validated/sqlalchemy.py @@ -1,7 +1,7 @@ from sqlalchemy import PrimaryKeyConstraint from src.db.models.helpers import enum_column -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.mixins import URLDependentMixin, CreatedAtMixin, UpdatedAtMixin from src.db.models.templates_.base import Base @@ -20,6 +20,6 @@ class FlagURLValidated( ) type = enum_column( - enum_type=URLValidatedType, - name="validated_url_type", + enum_type=URLType, + name="url_type", ) diff --git a/src/db/models/impl/url/core/sqlalchemy.py b/src/db/models/impl/url/core/sqlalchemy.py index 66bb3547..6caa216e 100644 --- a/src/db/models/impl/url/core/sqlalchemy.py +++ b/src/db/models/impl/url/core/sqlalchemy.py @@ -59,19 +59,16 @@ class URL(UpdatedAtMixin, CreatedAtMixin, WithIDBase): auto_location_subtasks = relationship( AutoLocationIDSubtask ) - # TODO (SM422): Remove uselist=False, pluralize - user_agency_suggestion = relationship( - "UserUrlAgencySuggestion", uselist=False, back_populates="url") + user_agency_suggestions = relationship( + "UserUrlAgencySuggestion", back_populates="url") auto_record_type_suggestion = relationship( "AutoRecordTypeSuggestion", uselist=False, back_populates="url") - # TODO (SM422): Remove uselist=False, pluralize - user_record_type_suggestion = relationship( - "UserRecordTypeSuggestion", uselist=False, back_populates="url") + user_record_type_suggestions = relationship( + "UserRecordTypeSuggestion", back_populates="url") auto_relevant_suggestion = relationship( "AutoRelevantSuggestion", uselist=False, back_populates="url") - # TODO (SM422): Remove uselist=False, pluralize - user_relevant_suggestion = relationship( - "UserRelevantSuggestion", uselist=False, back_populates="url") + user_relevant_suggestions = relationship( + "UserURLTypeSuggestion", back_populates="url") reviewing_user = relationship( "ReviewingUserURL", uselist=False, back_populates="url") optional_data_source_metadata = relationship( diff --git a/src/db/models/impl/url/suggestion/agency/user.py b/src/db/models/impl/url/suggestion/agency/user.py index 7a338fd0..f7c43aad 100644 --- a/src/db/models/impl/url/suggestion/agency/user.py +++ b/src/db/models/impl/url/suggestion/agency/user.py @@ -1,5 +1,5 @@ from sqlalchemy import Column, Boolean, UniqueConstraint, Integer -from sqlalchemy.orm import relationship +from sqlalchemy.orm import relationship, Mapped from src.db.models.helpers import get_agency_id_foreign_column from src.db.models.mixins import URLDependentMixin @@ -9,12 +9,12 @@ class UserUrlAgencySuggestion(URLDependentMixin, WithIDBase): __tablename__ = "user_url_agency_suggestions" - agency_id = get_agency_id_foreign_column(nullable=True) + agency_id: Mapped[int] = get_agency_id_foreign_column(nullable=True) user_id = Column(Integer, nullable=False) is_new = Column(Boolean, nullable=True) agency = relationship("Agency", back_populates="user_suggestions") - url = relationship("URL", back_populates="user_agency_suggestion") + url = relationship("URL", back_populates="user_agency_suggestions") __table_args__ = ( UniqueConstraint("agency_id", "url_id", "user_id", name="uq_user_url_agency_suggestions"), diff --git a/src/db/models/impl/url/suggestion/record_type/user.py b/src/db/models/impl/url/suggestion/record_type/user.py index 8fcc816b..5b9dde8c 100644 --- a/src/db/models/impl/url/suggestion/record_type/user.py +++ b/src/db/models/impl/url/suggestion/record_type/user.py @@ -19,4 +19,4 @@ class UserRecordTypeSuggestion(UpdatedAtMixin, CreatedAtMixin, URLDependentMixin # Relationships - url = relationship("URL", back_populates="user_record_type_suggestion") + url = relationship("URL", back_populates="user_record_type_suggestions") diff --git a/src/db/models/impl/url/suggestion/relevant/user.py b/src/db/models/impl/url/suggestion/relevant/user.py index a0cfed44..c7070b5e 100644 --- a/src/db/models/impl/url/suggestion/relevant/user.py +++ b/src/db/models/impl/url/suggestion/relevant/user.py @@ -1,28 +1,25 @@ from sqlalchemy import Column, UniqueConstraint, Integer from sqlalchemy.dialects import postgresql -from sqlalchemy.orm import relationship +from sqlalchemy.orm import relationship, Mapped +from src.db.models.helpers import enum_column +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.mixins import UpdatedAtMixin, CreatedAtMixin, URLDependentMixin from src.db.models.templates_.with_id import WithIDBase -class UserRelevantSuggestion( +class UserURLTypeSuggestion( UpdatedAtMixin, CreatedAtMixin, URLDependentMixin, WithIDBase ): - __tablename__ = "user_relevant_suggestions" + __tablename__ = "user_url_type_suggestions" user_id = Column(Integer, nullable=False) - suggested_status = Column( - postgresql.ENUM( - 'relevant', - 'not relevant', - 'individual record', - 'broken page/404 not found', - name='suggested_status' - ), + type: Mapped[URLType | None] = enum_column( + URLType, + name="url_type", nullable=True ) @@ -32,4 +29,4 @@ class UserRelevantSuggestion( # Relationships - url = relationship("URL", back_populates="user_relevant_suggestion") + url = relationship("URL", back_populates="user_relevant_suggestions") diff --git a/src/db/models/views/meta_url.py b/src/db/models/views/meta_url.py index bc963e11..20437075 100644 --- a/src/db/models/views/meta_url.py +++ b/src/db/models/views/meta_url.py @@ -1,7 +1,7 @@ """ CREATE OR REPLACE VIEW meta_url_view AS SELECT - urls.id + urls.id as url_id FROM urls INNER JOIN flag_url_validated fuv on fuv.url_id = urls.id where fuv.type = 'meta url' diff --git a/src/db/models/views/unvalidated_url.py b/src/db/models/views/unvalidated_url.py index 767ee960..bcfa9293 100644 --- a/src/db/models/views/unvalidated_url.py +++ b/src/db/models/views/unvalidated_url.py @@ -1,4 +1,5 @@ """ +CREATE OR REPLACE VIEW unvalidated_url_view AS select u.id as url_id from diff --git a/src/db/models/views/url_anno_count.py b/src/db/models/views/url_anno_count.py new file mode 100644 index 00000000..9a966718 --- /dev/null +++ b/src/db/models/views/url_anno_count.py @@ -0,0 +1,124 @@ +""" + CREATE OR REPLACE VIEW url_annotation_count AS + with auto_location_count as ( + select + u.id, + count(anno.url_id) as cnt + from urls u + inner join public.auto_location_id_subtasks anno on u.id = anno.url_id + group by u.id +) +, auto_agency_count as ( + select + u.id, + count(anno.url_id) as cnt + from urls u + inner join public.url_auto_agency_id_subtasks anno on u.id = anno.url_id + group by u.id +) +, auto_url_type_count as ( + select + u.id, + count(anno.url_id) as cnt + from urls u + inner join public.auto_relevant_suggestions anno on u.id = anno.url_id + group by u.id +) +, auto_record_type_count as ( + select + u.id, + count(anno.url_id) as cnt + from urls u + inner join public.auto_record_type_suggestions anno on u.id = anno.url_id + group by u.id +) +, user_location_count as ( + select + u.id, + count(anno.url_id) as cnt + from urls u + inner join public.user_location_suggestions anno on u.id = anno.url_id + group by u.id +) +, user_agency_count as ( + select + u.id, + count(anno.url_id) as cnt + from urls u + inner join public.user_url_agency_suggestions anno on u.id = anno.url_id + group by u.id +) +, user_url_type_count as ( + select + u.id, + count(anno.url_id) as cnt + from urls u + inner join public.user_url_type_suggestions anno on u.id = anno.url_id + group by u.id + ) +, user_record_type_count as ( + select + u.id, + count(anno.url_id) as cnt + from urls u + inner join public.user_record_type_suggestions anno on u.id = anno.url_id + group by u.id +) +select + u.id as url_id, + coalesce(auto_ag.cnt, 0) as auto_agency_count, + coalesce(auto_loc.cnt, 0) as auto_location_count, + coalesce(auto_rec.cnt, 0) as auto_record_type_count, + coalesce(auto_typ.cnt, 0) as auto_url_type_count, + coalesce(user_ag.cnt, 0) as user_agency_count, + coalesce(user_loc.cnt, 0) as user_location_count, + coalesce(user_rec.cnt, 0) as user_record_type_count, + coalesce(user_typ.cnt, 0) as user_url_type_count, + ( + coalesce(auto_ag.cnt, 0) + + coalesce(auto_loc.cnt, 0) + + coalesce(auto_rec.cnt, 0) + + coalesce(auto_typ.cnt, 0) + + coalesce(user_ag.cnt, 0) + + coalesce(user_loc.cnt, 0) + + coalesce(user_rec.cnt, 0) + + coalesce(user_typ.cnt, 0) + ) as total_anno_count + + from urls u + left join auto_agency_count auto_ag on auto_ag.id = u.id + left join auto_location_count auto_loc on auto_loc.id = u.id + left join auto_record_type_count auto_rec on auto_rec.id = u.id + left join auto_url_type_count auto_typ on auto_typ.id = u.id + left join user_agency_count user_ag on user_ag.id = u.id + left join user_location_count user_loc on user_loc.id = u.id + left join user_record_type_count user_rec on user_rec.id = u.id + left join user_url_type_count user_typ on user_typ.id = u.id +""" +from sqlalchemy import PrimaryKeyConstraint, Column, Integer + +from src.db.models.mixins import ViewMixin, URLDependentMixin +from src.db.models.templates_.base import Base + + +class URLAnnotationCount( + Base, + ViewMixin, + URLDependentMixin +): + + __tablename__ = "url_annotation_count_view" + __table_args__ = ( + PrimaryKeyConstraint("url_id"), + {"info": "view"} + ) + + auto_agency_count = Column(Integer, nullable=False) + auto_location_count = Column(Integer, nullable=False) + auto_record_type_count = Column(Integer, nullable=False) + auto_url_type_count = Column(Integer, nullable=False) + user_agency_count = Column(Integer, nullable=False) + user_location_count = Column(Integer, nullable=False) + user_record_type_count = Column(Integer, nullable=False) + user_url_type_count = Column(Integer, nullable=False) + total_anno_count = Column(Integer, nullable=False) \ No newline at end of file diff --git a/src/db/queries/implementations/core/common/annotation_exists_/constants.py b/src/db/queries/implementations/core/common/annotation_exists_/constants.py index ead32bc0..1237634e 100644 --- a/src/db/queries/implementations/core/common/annotation_exists_/constants.py +++ b/src/db/queries/implementations/core/common/annotation_exists_/constants.py @@ -3,13 +3,13 @@ from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.models.impl.url.suggestion.relevant.auto.sqlalchemy import AutoRelevantSuggestion -from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion +from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion ALL_ANNOTATION_MODELS = [ AutoRecordTypeSuggestion, AutoRelevantSuggestion, URLAutoAgencyIDSubtask, - UserRelevantSuggestion, + UserURLTypeSuggestion, UserRecordTypeSuggestion, UserUrlAgencySuggestion ] diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py index 634cf419..ab341cb3 100644 --- a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py +++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py @@ -3,7 +3,7 @@ from src.collectors.enums import URLStatus, CollectorType from src.core.enums import BatchStatus -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/not_relevant.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/not_relevant.py index e84f597b..3fba94ee 100644 --- a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/not_relevant.py +++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/not_relevant.py @@ -1,7 +1,7 @@ from sqlalchemy import select, func from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL @@ -26,7 +26,7 @@ FlagURLValidated.url_id == URL.id, ) .where( - FlagURLValidated.type == URLValidatedType.NOT_RELEVANT + FlagURLValidated.type == URLType.NOT_RELEVANT ) .group_by( Batch.id diff --git a/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py b/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py index 5d69be2a..17136cce 100644 --- a/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py +++ b/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py @@ -8,7 +8,7 @@ from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion +from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion from src.db.models.mixins import URLDependentMixin from src.db.queries.base.builder import QueryBuilderBase from src.db.queries.implementations.core.common.annotation_exists_.core import AnnotationExistsCTEQueryBuilder @@ -17,7 +17,7 @@ class PendingAnnotationExistsCTEQueryBuilder(AnnotationExistsCTEQueryBuilder): @property def has_user_relevant_annotation(self): - return self.get_exists_for_model(UserRelevantSuggestion) + return self.get_exists_for_model(UserURLTypeSuggestion) @property def has_user_record_type_annotation(self): diff --git a/src/db/types.py b/src/db/types.py index 3c24919b..dcee196f 100644 --- a/src/db/types.py +++ b/src/db/types.py @@ -2,9 +2,9 @@ from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion +from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion from src.db.queries.base.labels import LabelsBase -UserSuggestionType = UserUrlAgencySuggestion | UserRelevantSuggestion | UserRecordTypeSuggestion +UserSuggestionType = UserUrlAgencySuggestion | UserURLTypeSuggestion | UserRecordTypeSuggestion LabelsType = TypeVar("LabelsType", bound=LabelsBase) \ No newline at end of file diff --git a/tests/automated/integration/api/_helpers/RequestValidator.py b/tests/automated/integration/api/_helpers/RequestValidator.py index 7d0dc641..f2d68046 100644 --- a/tests/automated/integration/api/_helpers/RequestValidator.py +++ b/tests/automated/integration/api/_helpers/RequestValidator.py @@ -5,14 +5,8 @@ from pydantic import BaseModel from starlette.testclient import TestClient -from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAnnotationResponse -from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo -from src.api.endpoints.annotate.dtos.record_type.post import RecordTypeAnnotationPostInfo -from src.api.endpoints.annotate.dtos.record_type.response import GetNextRecordTypeAnnotationResponseOuterInfo -from src.api.endpoints.annotate.relevance.get.dto import GetNextRelevanceAnnotationResponseOuterInfo -from src.api.endpoints.annotate.relevance.post.dto import RelevanceAnnotationPostInfo from src.api.endpoints.batch.dtos.get.logs import GetBatchLogsResponse from src.api.endpoints.batch.dtos.get.summaries.response import GetBatchSummariesResponse from src.api.endpoints.batch.dtos.get.summaries.summary import BatchSummary @@ -33,13 +27,13 @@ from src.api.endpoints.review.reject.dto import FinalReviewRejectionInfo from src.api.endpoints.search.dtos.response import SearchURLResponse from src.api.endpoints.task.by_id.dto import TaskInfo -from src.api.endpoints.task.dtos.get.tasks import GetTasksResponse from src.api.endpoints.task.dtos.get.task_status import GetTaskStatusResponseInfo +from src.api.endpoints.task.dtos.get.tasks import GetTasksResponse from src.api.endpoints.url.get.dto import GetURLsResponseInfo -from src.db.enums import TaskType -from src.collectors.impl.example.dtos.input import ExampleInputDTO from src.collectors.enums import CollectorType +from src.collectors.impl.example.dtos.input import ExampleInputDTO from src.core.enums import BatchStatus +from src.db.enums import TaskType from src.util.helper_functions import update_if_not_none @@ -250,57 +244,6 @@ def abort_batch(self, batch_id: int) -> MessageResponse: ) return MessageResponse(**data) - def get_next_relevance_annotation(self) -> GetNextRelevanceAnnotationResponseOuterInfo: - data = self.get( - url=f"/annotate/relevance" - ) - return GetNextRelevanceAnnotationResponseOuterInfo(**data) - - def get_next_record_type_annotation(self) -> GetNextRecordTypeAnnotationResponseOuterInfo: - data = self.get( - url=f"/annotate/record-type" - ) - return GetNextRecordTypeAnnotationResponseOuterInfo(**data) - - def post_record_type_annotation_and_get_next( - self, - url_id: int, - record_type_annotation_post_info: RecordTypeAnnotationPostInfo - ) -> GetNextRecordTypeAnnotationResponseOuterInfo: - data = self.post_v2( - url=f"/annotate/record-type/{url_id}", - json=record_type_annotation_post_info.model_dump(mode='json') - ) - return GetNextRecordTypeAnnotationResponseOuterInfo(**data) - - def post_relevance_annotation_and_get_next( - self, - url_id: int, - relevance_annotation_post_info: RelevanceAnnotationPostInfo - ) -> GetNextRelevanceAnnotationResponseOuterInfo: - data = self.post_v2( - url=f"/annotate/relevance/{url_id}", - json=relevance_annotation_post_info.model_dump(mode='json') - ) - return GetNextRelevanceAnnotationResponseOuterInfo(**data) - - async def get_next_agency_annotation(self) -> GetNextURLForAgencyAnnotationResponse: - data = self.get( - url=f"/annotate/agency" - ) - return GetNextURLForAgencyAnnotationResponse(**data) - - async def post_agency_annotation_and_get_next( - self, - url_id: int, - agency_annotation_post_info: URLAgencyAnnotationPostInfo - ) -> GetNextURLForAgencyAnnotationResponse: - data = self.post( - url=f"/annotate/agency/{url_id}", - json=agency_annotation_post_info.model_dump(mode='json') - ) - return GetNextURLForAgencyAnnotationResponse(**data) - def get_urls(self, page: int = 1, errors: bool = False) -> GetURLsResponseInfo: data = self.get( url=f"/url", diff --git a/tests/automated/integration/api/annotate/agency/test_multiple_auto_suggestions.py b/tests/automated/integration/api/annotate/agency/test_multiple_auto_suggestions.py deleted file mode 100644 index 65b20b0c..00000000 --- a/tests/automated/integration/api/annotate/agency/test_multiple_auto_suggestions.py +++ /dev/null @@ -1,46 +0,0 @@ -import pytest - -from src.core.enums import SuggestionType -from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo - - -@pytest.mark.asyncio -async def test_annotate_agency_multiple_auto_suggestions(api_test_helper): - """ - Test Scenario: Multiple Auto Suggestions - A URL has multiple Agency Auto Suggestion and has not been annotated by the User - The user should receive all of the auto suggestions with full detail - """ - ath = api_test_helper - buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls( - url_count=1, - with_html_content=True - ) - await ath.db_data_creator.auto_suggestions( - url_ids=buci.url_ids, - num_suggestions=2, - suggestion_type=SuggestionType.AUTO_SUGGESTION - ) - - # User requests next annotation - response = await ath.request_validator.get_next_agency_annotation() - - assert response.next_annotation - next_annotation = response.next_annotation - # Check that url_id matches the one we inserted - assert next_annotation.url_info.url_id == buci.url_ids[0] - - # Check that html data is present - assert next_annotation.html_info.description != "" - assert next_annotation.html_info.title != "" - - # Check that two agency_suggestions exist - assert len(next_annotation.agency_suggestions) == 2 - - for agency_suggestion in next_annotation.agency_suggestions: - assert agency_suggestion.suggestion_type == SuggestionType.AUTO_SUGGESTION - assert agency_suggestion.pdap_agency_id is not None - assert agency_suggestion.agency_name is not None - assert agency_suggestion.state is not None - assert agency_suggestion.county is not None - assert agency_suggestion.locality is not None diff --git a/tests/automated/integration/api/annotate/agency/test_multiple_auto_suggestions_no_html.py b/tests/automated/integration/api/annotate/agency/test_multiple_auto_suggestions_no_html.py deleted file mode 100644 index 5bcb4569..00000000 --- a/tests/automated/integration/api/annotate/agency/test_multiple_auto_suggestions_no_html.py +++ /dev/null @@ -1,35 +0,0 @@ -import pytest - -from src.core.enums import SuggestionType -from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo - - -@pytest.mark.asyncio -async def test_annotate_agency_multiple_auto_suggestions_no_html(api_test_helper): - """ - Test Scenario: Multiple Auto Suggestions - A URL has multiple Agency Auto Suggestion and has not been annotated by the User - The user should receive all of the auto suggestions with full detail - """ - ath = api_test_helper - buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls( - url_count=1, - with_html_content=False - ) - await ath.db_data_creator.auto_suggestions( - url_ids=buci.url_ids, - num_suggestions=2, - suggestion_type=SuggestionType.AUTO_SUGGESTION - ) - - # User requests next annotation - response = await ath.request_validator.get_next_agency_annotation() - - assert response.next_annotation - next_annotation = response.next_annotation - # Check that url_id matches the one we inserted - assert next_annotation.url_info.url_id == buci.url_ids[0] - - # Check that html data is not present - assert next_annotation.html_info.description == "" - assert next_annotation.html_info.title == "" diff --git a/tests/automated/integration/api/annotate/agency/test_other_user_annotation.py b/tests/automated/integration/api/annotate/agency/test_other_user_annotation.py deleted file mode 100644 index a3ecae79..00000000 --- a/tests/automated/integration/api/annotate/agency/test_other_user_annotation.py +++ /dev/null @@ -1,44 +0,0 @@ -import pytest - -from tests.automated.integration.api.conftest import MOCK_USER_ID -from tests.helpers.setup.annotate_agency.core import setup_for_annotate_agency -from tests.helpers.setup.annotate_agency.model import AnnotateAgencySetupInfo - - -@pytest.mark.asyncio -async def test_annotate_agency_other_user_annotation(api_test_helper): - """ - Test Scenario: Other User Annotation - A URL has been annotated by another User - Our user should still receive this URL to annotate - """ - ath = api_test_helper - setup_info: AnnotateAgencySetupInfo = await setup_for_annotate_agency( - db_data_creator=ath.db_data_creator, - url_count=1 - ) - url_ids = setup_info.url_ids - - response = await ath.request_validator.get_next_agency_annotation() - - assert response.next_annotation - next_annotation = response.next_annotation - # Check that url_id matches the one we inserted - assert next_annotation.url_info.url_id == url_ids[0] - - # Check that html data is present - assert next_annotation.html_info.description != "" - assert next_annotation.html_info.title != "" - - # Check that one agency_suggestion exists - assert len(next_annotation.agency_suggestions) == 1 - - # Test that another user can insert a suggestion - await ath.db_data_creator.manual_suggestion( - user_id=MOCK_USER_ID + 1, - url_id=url_ids[0], - ) - - # After this, text that our user does not receive this URL - response = await ath.request_validator.get_next_agency_annotation() - assert response.next_annotation is None diff --git a/tests/automated/integration/api/annotate/agency/test_single_confirmed_agency.py b/tests/automated/integration/api/annotate/agency/test_single_confirmed_agency.py deleted file mode 100644 index e38421e1..00000000 --- a/tests/automated/integration/api/annotate/agency/test_single_confirmed_agency.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest - -from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo - - -@pytest.mark.asyncio -async def test_annotate_agency_single_confirmed_agency(api_test_helper): - """ - Test Scenario: Single Confirmed Agency - A URL has a single Confirmed Agency and has not been annotated by the User - The user should not receive this URL to annotate - """ - ath = api_test_helper - buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls( - url_count=1, - with_html_content=True - ) - await ath.db_data_creator.confirmed_suggestions( - url_ids=buci.url_ids, - ) - response = await ath.request_validator.get_next_agency_annotation() - assert response.next_annotation is None diff --git a/tests/automated/integration/api/annotate/agency/test_single_unknown_auto_suggestions.py b/tests/automated/integration/api/annotate/agency/test_single_unknown_auto_suggestions.py deleted file mode 100644 index f911bba5..00000000 --- a/tests/automated/integration/api/annotate/agency/test_single_unknown_auto_suggestions.py +++ /dev/null @@ -1,45 +0,0 @@ -import pytest - -from src.core.enums import SuggestionType -from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo - - -@pytest.mark.asyncio -async def test_annotate_agency_single_unknown_auto_suggestion(api_test_helper): - """ - Test Scenario: Single Unknown Auto Suggestion - A URL has a single Unknown Agency Auto Suggestion and has not been annotated by the User - The user should receive a single Unknown Auto Suggestion lacking other detail - """ - ath = api_test_helper - buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls( - url_count=1, - with_html_content=True - ) - await ath.db_data_creator.auto_suggestions( - url_ids=buci.url_ids, - num_suggestions=1, - suggestion_type=SuggestionType.UNKNOWN - ) - response = await ath.request_validator.get_next_agency_annotation() - - assert response.next_annotation - next_annotation = response.next_annotation - # Check that url_id matches the one we inserted - assert next_annotation.url_info.url_id == buci.url_ids[0] - - # Check that html data is present - assert next_annotation.html_info.description != "" - assert next_annotation.html_info.title != "" - - # Check that one agency_suggestion exists - assert len(next_annotation.agency_suggestions) == 1 - - agency_suggestion = next_annotation.agency_suggestions[0] - - assert agency_suggestion.suggestion_type == SuggestionType.UNKNOWN - assert agency_suggestion.pdap_agency_id is None - assert agency_suggestion.agency_name is None - assert agency_suggestion.state is None - assert agency_suggestion.county is None - assert agency_suggestion.locality is None diff --git a/tests/automated/integration/api/annotate/agency/test_submit_and_get_next.py b/tests/automated/integration/api/annotate/agency/test_submit_and_get_next.py deleted file mode 100644 index 91049daa..00000000 --- a/tests/automated/integration/api/annotate/agency/test_submit_and_get_next.py +++ /dev/null @@ -1,42 +0,0 @@ -import pytest - -from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo -from tests.helpers.setup.annotate_agency.core import setup_for_annotate_agency -from tests.helpers.setup.annotate_agency.model import AnnotateAgencySetupInfo - - -@pytest.mark.asyncio -async def test_annotate_agency_submit_and_get_next(api_test_helper): - """ - Test Scenario: Submit and Get Next (no other URL available) - A URL has been annotated by our User, and no other valid URLs have not been annotated - Our user should not receive another URL to annotate - Until another relevant URL is added - """ - ath = api_test_helper - setup_info: AnnotateAgencySetupInfo = await setup_for_annotate_agency( - db_data_creator=ath.db_data_creator, - url_count=2 - ) - url_ids = setup_info.url_ids - - # User should submit an annotation and receive the next - response = await ath.request_validator.post_agency_annotation_and_get_next( - url_id=url_ids[0], - agency_annotation_post_info=URLAgencyAnnotationPostInfo( - suggested_agency=await ath.db_data_creator.agency(), - is_new=False - ) - - ) - assert response.next_annotation is not None - - # User should submit this annotation and receive none for the next - response = await ath.request_validator.post_agency_annotation_and_get_next( - url_id=url_ids[1], - agency_annotation_post_info=URLAgencyAnnotationPostInfo( - suggested_agency=await ath.db_data_creator.agency(), - is_new=False - ) - ) - assert response.next_annotation is None diff --git a/tests/automated/integration/api/annotate/agency/test_submit_new.py b/tests/automated/integration/api/annotate/agency/test_submit_new.py deleted file mode 100644 index e82c767f..00000000 --- a/tests/automated/integration/api/annotate/agency/test_submit_new.py +++ /dev/null @@ -1,38 +0,0 @@ -import pytest - -from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion -from tests.helpers.setup.annotate_agency.core import setup_for_annotate_agency -from tests.helpers.setup.annotate_agency.model import AnnotateAgencySetupInfo - - -@pytest.mark.asyncio -async def test_annotate_agency_submit_new(api_test_helper): - """ - Test Scenario: Submit New - Our user receives an annotation and marks it as `NEW` - This should complete successfully - And within the database the annotation should be marked as `NEW` - """ - ath = api_test_helper - adb_client = ath.adb_client() - setup_info: AnnotateAgencySetupInfo = await setup_for_annotate_agency( - db_data_creator=ath.db_data_creator, - url_count=1 - ) - url_ids = setup_info.url_ids - - # User should submit an annotation and mark it as New - response = await ath.request_validator.post_agency_annotation_and_get_next( - url_id=url_ids[0], - agency_annotation_post_info=URLAgencyAnnotationPostInfo( - suggested_agency=await ath.db_data_creator.agency(), - is_new=True - ) - ) - assert response.next_annotation is None - - # Within database, the annotation should be marked as `NEW` - all_manual_suggestions = await adb_client.get_all(UserUrlAgencySuggestion) - assert len(all_manual_suggestions) == 1 - assert all_manual_suggestions[0].is_new diff --git a/tests/automated/integration/api/annotate/all/test_happy_path.py b/tests/automated/integration/api/annotate/all/test_happy_path.py index c50127a3..f3f17126 100644 --- a/tests/automated/integration/api/annotate/all/test_happy_path.py +++ b/tests/automated/integration/api/annotate/all/test_happy_path.py @@ -1,17 +1,15 @@ -from collections import Counter - import pytest -from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationUserSuggestion from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.api.endpoints.annotate.all.get.queries.core import GetNextURLForAllAnnotationQueryBuilder from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo -from src.core.enums import SuggestedStatus, RecordType +from src.core.enums import RecordType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion +from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review @@ -57,12 +55,9 @@ async def test_annotate_all( post_response_1 = await ath.request_validator.post_all_annotations_and_get_next( url_id=url_mapping_1.url_id, all_annotations_post_info=AllAnnotationPostInfo( - suggested_status=SuggestedStatus.RELEVANT, + suggested_status=URLType.DATA_SOURCE, record_type=RecordType.ACCIDENT_REPORTS, - agency=URLAgencyAnnotationPostInfo( - is_new=False, - suggested_agency=agency_id - ), + agency_ids=[agency_id], location_ids=[ california.location_id, pennsylvania.location_id, @@ -78,8 +73,9 @@ async def test_annotate_all( post_response_2 = await ath.request_validator.post_all_annotations_and_get_next( url_id=url_mapping_2.url_id, all_annotations_post_info=AllAnnotationPostInfo( - suggested_status=SuggestedStatus.NOT_RELEVANT, - location_ids=[] + suggested_status=URLType.NOT_RELEVANT, + location_ids=[], + agency_ids=[] ) ) assert post_response_2.next_annotation is None @@ -91,15 +87,15 @@ async def test_annotate_all( # Check that all annotations are present in the database # Should be two relevance annotations, one True and one False - all_relevance_suggestions: list[UserRelevantSuggestion] = await adb_client.get_all(UserRelevantSuggestion) + all_relevance_suggestions: list[UserURLTypeSuggestion] = await adb_client.get_all(UserURLTypeSuggestion) assert len(all_relevance_suggestions) == 2 - assert all_relevance_suggestions[0].suggested_status == SuggestedStatus.RELEVANT.value - assert all_relevance_suggestions[1].suggested_status == SuggestedStatus.NOT_RELEVANT.value + assert all_relevance_suggestions[0].type == URLType.DATA_SOURCE + assert all_relevance_suggestions[1].type == URLType.NOT_RELEVANT # Should be one agency all_agency_suggestions = await adb_client.get_all(UserUrlAgencySuggestion) assert len(all_agency_suggestions) == 1 - assert all_agency_suggestions[0].is_new == False + assert all_agency_suggestions[0].is_new is None assert all_agency_suggestions[0].agency_id == agency_id # Should be one record type diff --git a/tests/automated/integration/api/annotate/all/test_post_batch_filtering.py b/tests/automated/integration/api/annotate/all/test_post_batch_filtering.py index 7a1d0578..bfeccc6b 100644 --- a/tests/automated/integration/api/annotate/all/test_post_batch_filtering.py +++ b/tests/automated/integration/api/annotate/all/test_post_batch_filtering.py @@ -1,8 +1,7 @@ import pytest -from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo -from src.core.enums import SuggestedStatus, RecordType +from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review @@ -30,12 +29,9 @@ async def test_annotate_all_post_batch_filtering(api_test_helper): url_id=url_mapping_1.url_id, batch_id=setup_info_3.batch_id, all_annotations_post_info=AllAnnotationPostInfo( - suggested_status=SuggestedStatus.RELEVANT, - record_type=RecordType.ACCIDENT_REPORTS, - agency=URLAgencyAnnotationPostInfo( - is_new=True - ), - location_ids=[] + suggested_status=URLType.NOT_RELEVANT, + location_ids=[], + agency_ids=[] ) ) diff --git a/tests/automated/integration/api/annotate/all/test_validation_error.py b/tests/automated/integration/api/annotate/all/test_validation_error.py index e9f8702f..9c6e244b 100644 --- a/tests/automated/integration/api/annotate/all/test_validation_error.py +++ b/tests/automated/integration/api/annotate/all/test_validation_error.py @@ -1,8 +1,9 @@ import pytest from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo -from src.core.enums import SuggestedStatus, RecordType +from src.core.enums import RecordType from src.core.exceptions import FailedValidationException +from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review @@ -21,8 +22,9 @@ async def test_annotate_all_validation_error(api_test_helper): response = await ath.request_validator.post_all_annotations_and_get_next( url_id=url_mapping_1.url_id, all_annotations_post_info=AllAnnotationPostInfo( - suggested_status=SuggestedStatus.NOT_RELEVANT, + suggested_status=URLType.NOT_RELEVANT, record_type=RecordType.ACCIDENT_REPORTS, - location_ids=[] + location_ids=[], + agency_ids=[] ) ) diff --git a/tests/automated/integration/api/annotate/record_type/test_record_type.py b/tests/automated/integration/api/annotate/record_type/test_record_type.py deleted file mode 100644 index 5e6d8917..00000000 --- a/tests/automated/integration/api/annotate/record_type/test_record_type.py +++ /dev/null @@ -1,166 +0,0 @@ -from http import HTTPStatus - -import pytest -from fastapi import HTTPException - -from src.api.endpoints.annotate.dtos.record_type.post import RecordTypeAnnotationPostInfo -from src.api.endpoints.annotate.dtos.record_type.response import GetNextRecordTypeAnnotationResponseOuterInfo -from src.core.enums import RecordType -from src.core.error_manager.enums import ErrorTypes -from src.db.dtos.url.insert import InsertURLsInfo -from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion -from tests.automated.integration.api.annotate.helpers import check_url_mappings_match, check_html_info_not_empty, \ - html_info_empty -from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo - - -@pytest.mark.asyncio -async def test_annotate_record_type(api_test_helper): - ath = api_test_helper - - batch_id = ath.db_data_creator.batch() - - # Create 2 URLs with outcome `pending` - iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2) - - url_1 = iui.url_mappings[0] - url_2 = iui.url_mappings[1] - - # Add record type attribute with value `Accident Reports` to 1st URL - await ath.db_data_creator.auto_record_type_suggestions( - url_id=url_1.url_id, - record_type=RecordType.ACCIDENT_REPORTS - ) - - # Add 'Record Type' attribute with value `Dispatch Recordings` to 2nd URL - await ath.db_data_creator.auto_record_type_suggestions( - url_id=url_2.url_id, - record_type=RecordType.DISPATCH_RECORDINGS - ) - - # Add HTML data to both - await ath.db_data_creator.html_data([url_1.url_id, url_2.url_id]) - - # Call `GET` `/annotate/record-type` and receive next URL - request_info_1: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_record_type_annotation() - inner_info_1 = request_info_1.next_annotation - - check_url_mappings_match(inner_info_1.url_info, url_1) - check_html_info_not_empty(inner_info_1.html_info) - - # Validate that the correct record type is returned - assert inner_info_1.suggested_record_type == RecordType.ACCIDENT_REPORTS - - # Annotate with value 'Personnel Records' and get next URL - request_info_2: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.post_record_type_annotation_and_get_next( - url_id=inner_info_1.url_info.url_id, - record_type_annotation_post_info=RecordTypeAnnotationPostInfo( - record_type=RecordType.PERSONNEL_RECORDS - ) - ) - - inner_info_2 = request_info_2.next_annotation - - check_url_mappings_match(inner_info_2.url_info, url_2) - check_html_info_not_empty(inner_info_2.html_info) - - request_info_3: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.post_record_type_annotation_and_get_next( - url_id=inner_info_2.url_info.url_id, - record_type_annotation_post_info=RecordTypeAnnotationPostInfo( - record_type=RecordType.ANNUAL_AND_MONTHLY_REPORTS - ) - ) - - assert request_info_3.next_annotation is None - - # Get all URL annotations. Confirm they exist for user - adb_client = ath.adb_client() - results: list[UserRecordTypeSuggestion] = await adb_client.get_all(UserRecordTypeSuggestion) - result_1 = results[0] - result_2 = results[1] - - assert result_1.url_id == inner_info_1.url_info.url_id - assert result_1.record_type == RecordType.PERSONNEL_RECORDS.value - - assert result_2.url_id == inner_info_2.url_info.url_id - assert result_2.record_type == RecordType.ANNUAL_AND_MONTHLY_REPORTS.value - - # If user submits annotation for same URL, the URL should be overwritten - - request_info_4: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.post_record_type_annotation_and_get_next( - url_id=inner_info_1.url_info.url_id, - record_type_annotation_post_info=RecordTypeAnnotationPostInfo( - record_type=RecordType.BOOKING_REPORTS - ) - ) - - assert request_info_4.next_annotation is None - - results: list[UserRecordTypeSuggestion] = await adb_client.get_all(UserRecordTypeSuggestion) - assert len(results) == 2 - - for result in results: - if result.url_id == inner_info_1.url_info.url_id: - assert result.record_type == RecordType.BOOKING_REPORTS.value - - -@pytest.mark.asyncio -async def test_annotate_record_type_already_annotated_by_different_user( - api_test_helper -): - ath = api_test_helper - - creation_info: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls( - url_count=1 - ) - - await ath.db_data_creator.user_record_type_suggestion( - url_id=creation_info.url_ids[0], - user_id=2, - record_type=RecordType.ACCIDENT_REPORTS - ) - - # Annotate with different user (default is 1) and get conflict error - try: - response = await ath.request_validator.post_record_type_annotation_and_get_next( - url_id=creation_info.url_ids[0], - record_type_annotation_post_info=RecordTypeAnnotationPostInfo( - record_type=RecordType.ANNUAL_AND_MONTHLY_REPORTS - ) - ) - except HTTPException as e: - assert e.status_code == HTTPStatus.CONFLICT - assert e.detail["detail"]["code"] == ErrorTypes.ANNOTATION_EXISTS.value - assert e.detail["detail"]["message"] == f"Annotation of type RECORD_TYPE already exists for url {creation_info.url_ids[0]}" - - -@pytest.mark.asyncio -async def test_annotate_record_type_no_html_info(api_test_helper): - ath = api_test_helper - - batch_id = ath.db_data_creator.batch() - - # Create 2 URLs with outcome `pending` - iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2) - - url_1 = iui.url_mappings[0] - url_2 = iui.url_mappings[1] - - # Add record type attribute with value `Accident Reports` to 1st URL - await ath.db_data_creator.auto_record_type_suggestions( - url_id=url_1.url_id, - record_type=RecordType.ACCIDENT_REPORTS - ) - - # Add 'Record Type' attribute with value `Dispatch Recordings` to 2nd URL - await ath.db_data_creator.auto_record_type_suggestions( - url_id=url_2.url_id, - record_type=RecordType.DISPATCH_RECORDINGS - ) - - # Call `GET` `/annotate/record-type` and receive next URL - request_info_1: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_record_type_annotation() - inner_info_1 = request_info_1.next_annotation - - check_url_mappings_match(inner_info_1.url_info, url_1) - assert html_info_empty(inner_info_1.html_info) diff --git a/tests/automated/integration/api/annotate/relevancy/test_relevancy.py b/tests/automated/integration/api/annotate/relevancy/test_relevancy.py deleted file mode 100644 index 387d68c0..00000000 --- a/tests/automated/integration/api/annotate/relevancy/test_relevancy.py +++ /dev/null @@ -1,213 +0,0 @@ -from http import HTTPStatus - -import pytest -from fastapi import HTTPException - -from src.api.endpoints.annotate.relevance.get.dto import GetNextRelevanceAnnotationResponseOuterInfo -from src.api.endpoints.annotate.relevance.post.dto import RelevanceAnnotationPostInfo -from src.core.enums import SuggestedStatus -from src.core.error_manager.enums import ErrorTypes -from src.db.dtos.url.insert import InsertURLsInfo -from src.db.models.impl.url.suggestion.relevant.user import UserRelevantSuggestion -from tests.automated.integration.api.annotate.helpers import check_url_mappings_match, check_html_info_not_empty, \ - html_info_empty -from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo - - -@pytest.mark.asyncio -async def test_annotate_relevancy(api_test_helper): - ath = api_test_helper - - batch_id = ath.db_data_creator.batch() - - # Create 2 URLs with outcome `pending` - iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2) - - url_1 = iui.url_mappings[0] - url_2 = iui.url_mappings[1] - - # Add `Relevancy` attribute with value `True` to 1st URL - await ath.db_data_creator.auto_relevant_suggestions( - url_id=url_1.url_id, - relevant=True - ) - - # Add 'Relevancy' attribute with value `False` to 2nd URL - await ath.db_data_creator.auto_relevant_suggestions( - url_id=url_2.url_id, - relevant=False - ) - - # Add HTML data to both - await ath.db_data_creator.html_data([url_1.url_id, url_2.url_id]) - # Call `GET` `/annotate/relevance` and receive next URL - request_info_1: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_relevance_annotation() - inner_info_1 = request_info_1.next_annotation - - check_url_mappings_match(inner_info_1.url_info, url_1) - check_html_info_not_empty(inner_info_1.html_info) - - # Validate that the correct relevant value is returned - assert inner_info_1.annotation.is_relevant is True - - # A second user should see the same URL - - - # Annotate with value 'False' and get next URL - request_info_2: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next( - url_id=inner_info_1.url_info.url_id, - relevance_annotation_post_info=RelevanceAnnotationPostInfo( - suggested_status=SuggestedStatus.NOT_RELEVANT - ) - ) - - inner_info_2 = request_info_2.next_annotation - - check_url_mappings_match( - inner_info_2.url_info, - url_2 - ) - check_html_info_not_empty(inner_info_2.html_info) - - request_info_3: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next( - url_id=inner_info_2.url_info.url_id, - relevance_annotation_post_info=RelevanceAnnotationPostInfo( - suggested_status=SuggestedStatus.RELEVANT - ) - ) - - assert request_info_3.next_annotation is None - - # Get all URL annotations. Confirm they exist for user - adb_client = ath.adb_client() - results: list[UserRelevantSuggestion] = await adb_client.get_all(UserRelevantSuggestion) - result_1 = results[0] - result_2 = results[1] - - assert result_1.url_id == inner_info_1.url_info.url_id - assert result_1.suggested_status == SuggestedStatus.NOT_RELEVANT.value - - assert result_2.url_id == inner_info_2.url_info.url_id - assert result_2.suggested_status == SuggestedStatus.RELEVANT.value - - # If user submits annotation for same URL, the URL should be overwritten - request_info_4: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next( - url_id=inner_info_1.url_info.url_id, - relevance_annotation_post_info=RelevanceAnnotationPostInfo( - suggested_status=SuggestedStatus.RELEVANT - ) - ) - - assert request_info_4.next_annotation is None - - results: list[UserRelevantSuggestion] = await adb_client.get_all(UserRelevantSuggestion) - assert len(results) == 2 - - for result in results: - if result.url_id == inner_info_1.url_info.url_id: - assert results[0].suggested_status == SuggestedStatus.RELEVANT.value - - -async def post_and_validate_relevancy_annotation(ath, url_id, annotation: SuggestedStatus): - response = ath.request_validator.post_relevance_annotation_and_get_next( - url_id=url_id, - relevance_annotation_post_info=RelevanceAnnotationPostInfo( - suggested_status=annotation - ) - ) - - assert response.next_annotation is None - - results: list[UserRelevantSuggestion] = await ath.adb_client().get_all(UserRelevantSuggestion) - assert len(results) == 1 - assert results[0].suggested_status == annotation.value - - -@pytest.mark.asyncio -async def test_annotate_relevancy_broken_page(api_test_helper): - ath = api_test_helper - - creation_info = await ath.db_data_creator.batch_and_urls(url_count=1, with_html_content=False) - - await post_and_validate_relevancy_annotation( - ath, - url_id=creation_info.url_ids[0], - annotation=SuggestedStatus.BROKEN_PAGE_404 - ) - - -@pytest.mark.asyncio -async def test_annotate_relevancy_individual_record(api_test_helper): - ath = api_test_helper - - creation_info: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls( - url_count=1 - ) - - await post_and_validate_relevancy_annotation( - ath, - url_id=creation_info.url_ids[0], - annotation=SuggestedStatus.INDIVIDUAL_RECORD - ) - - -@pytest.mark.asyncio -async def test_annotate_relevancy_already_annotated_by_different_user( - api_test_helper -): - ath = api_test_helper - - creation_info: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls( - url_count=1 - ) - - await ath.db_data_creator.user_relevant_suggestion( - url_id=creation_info.url_ids[0], - user_id=2, - suggested_status=SuggestedStatus.RELEVANT - ) - - # Annotate with different user (default is 1) and get conflict error - try: - response = await ath.request_validator.post_relevance_annotation_and_get_next( - url_id=creation_info.url_ids[0], - relevance_annotation_post_info=RelevanceAnnotationPostInfo( - suggested_status=SuggestedStatus.NOT_RELEVANT - ) - ) - except HTTPException as e: - assert e.status_code == HTTPStatus.CONFLICT - assert e.detail["detail"]["code"] == ErrorTypes.ANNOTATION_EXISTS.value - assert e.detail["detail"]["message"] == f"Annotation of type RELEVANCE already exists for url {creation_info.url_ids[0]}" - - -@pytest.mark.asyncio -async def test_annotate_relevancy_no_html(api_test_helper): - ath = api_test_helper - - batch_id = ath.db_data_creator.batch() - - # Create 2 URLs with outcome `pending` - iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2) - - url_1 = iui.url_mappings[0] - url_2 = iui.url_mappings[1] - - # Add `Relevancy` attribute with value `True` to 1st URL - await ath.db_data_creator.auto_relevant_suggestions( - url_id=url_1.url_id, - relevant=True - ) - - # Add 'Relevancy' attribute with value `False` to 2nd URL - await ath.db_data_creator.auto_relevant_suggestions( - url_id=url_2.url_id, - relevant=False - ) - - # Call `GET` `/annotate/relevance` and receive next URL - request_info_1: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_relevance_annotation() - inner_info_1 = request_info_1.next_annotation - - check_url_mappings_match(inner_info_1.url_info, url_1) - assert html_info_empty(inner_info_1.html_info) diff --git a/tests/automated/integration/api/metrics/batches/test_aggregated.py b/tests/automated/integration/api/metrics/batches/test_aggregated.py index 4b7b4f75..090896e8 100644 --- a/tests/automated/integration/api/metrics/batches/test_aggregated.py +++ b/tests/automated/integration/api/metrics/batches/test_aggregated.py @@ -5,7 +5,7 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.mapping import URLMapping from src.db.helpers.connect import get_postgres_connection_string -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters from tests.helpers.data_creator.create import create_batch, create_url_data_sources, create_urls, \ create_batch_url_links, create_validated_flags @@ -48,12 +48,12 @@ async def test_get_batches_aggregated_metrics( await create_validated_flags( adb_client=adb_client, url_ids=urls_validated + urls_submitted, - validation_type=URLValidatedType.DATA_SOURCE, + validation_type=URLType.DATA_SOURCE, ) await create_validated_flags( adb_client=adb_client, url_ids=urls_not_relevant, - validation_type=URLValidatedType.NOT_RELEVANT, + validation_type=URLType.NOT_RELEVANT, ) await create_url_data_sources( adb_client=adb_client, diff --git a/tests/automated/integration/api/metrics/batches/test_breakdown.py b/tests/automated/integration/api/metrics/batches/test_breakdown.py index 0657c66f..c6ef6e0b 100644 --- a/tests/automated/integration/api/metrics/batches/test_breakdown.py +++ b/tests/automated/integration/api/metrics/batches/test_breakdown.py @@ -7,7 +7,7 @@ from src.core.enums import BatchStatus from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.mapping import URLMapping -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.data_creator.create import create_batch, create_urls, create_batch_url_links, create_validated_flags, \ create_url_data_sources @@ -32,7 +32,7 @@ async def test_get_batches_breakdown_metrics(api_test_helper): await create_validated_flags( adb_client=adb_client, url_ids=url_ids_1[:2], - validation_type=URLValidatedType.DATA_SOURCE + validation_type=URLType.DATA_SOURCE ) await create_url_data_sources( adb_client=adb_client, @@ -64,12 +64,12 @@ async def test_get_batches_breakdown_metrics(api_test_helper): await create_validated_flags( adb_client=adb_client, url_ids=validated_url_ids[:3], - validation_type=URLValidatedType.NOT_RELEVANT, + validation_type=URLType.NOT_RELEVANT, ) await create_validated_flags( adb_client=adb_client, url_ids=validated_url_ids[4:9], - validation_type=URLValidatedType.DATA_SOURCE, + validation_type=URLType.DATA_SOURCE, ) await create_batch_url_links( adb_client=adb_client, diff --git a/tests/automated/integration/api/metrics/test_backlog.py b/tests/automated/integration/api/metrics/test_backlog.py index e48db202..da8dccd6 100644 --- a/tests/automated/integration/api/metrics/test_backlog.py +++ b/tests/automated/integration/api/metrics/test_backlog.py @@ -1,14 +1,9 @@ import pendulum import pytest -from src.collectors.enums import CollectorType, URLStatus -from src.core.enums import SuggestedStatus +from src.collectors.enums import URLStatus from src.db.dtos.url.mapping import URLMapping -from src.db.models.impl.flag.url_validated.enums import URLValidatedType -from tests.helpers.batch_creation_parameters.annotation_info import AnnotationInfo -from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters -from tests.helpers.batch_creation_parameters.enums import URLCreationEnum -from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters +from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.data_creator.core import DBDataCreator @@ -31,7 +26,7 @@ async def test_get_backlog_metrics(api_test_helper): submitted_url_ids_1: list[int] = url_ids_1[:2] await ddc.create_validated_flags( url_ids=submitted_url_ids_1, - validation_type=URLValidatedType.DATA_SOURCE + validation_type=URLType.DATA_SOURCE ) await ddc.create_url_data_sources(url_ids=submitted_url_ids_1) @@ -49,7 +44,7 @@ async def test_get_backlog_metrics(api_test_helper): await ddc.create_batch_url_links(url_ids=not_relevant_url_ids_2, batch_id=batch_2_id) await ddc.create_validated_flags( url_ids=not_relevant_url_ids_2[:4], - validation_type=URLValidatedType.NOT_RELEVANT + validation_type=URLType.NOT_RELEVANT ) error_url_mappings_2: list[URLMapping] = await ddc.create_urls( status=URLStatus.ERROR, @@ -72,7 +67,7 @@ async def test_get_backlog_metrics(api_test_helper): await ddc.create_batch_url_links(url_ids=url_ids_3, batch_id=batch_3_id) await ddc.create_validated_flags( url_ids=url_ids_3[:5], - validation_type=URLValidatedType.DATA_SOURCE + validation_type=URLType.DATA_SOURCE ) diff --git a/tests/automated/integration/api/metrics/urls/aggregated/test_core.py b/tests/automated/integration/api/metrics/urls/aggregated/test_core.py index 08c52845..92dcba16 100644 --- a/tests/automated/integration/api/metrics/urls/aggregated/test_core.py +++ b/tests/automated/integration/api/metrics/urls/aggregated/test_core.py @@ -5,7 +5,7 @@ from src.collectors.enums import CollectorType, URLStatus from src.db.dtos.url.mapping import URLMapping -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters from tests.helpers.batch_creation_parameters.enums import URLCreationEnum from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters @@ -49,8 +49,8 @@ async def test_get_urls_aggregated_metrics(api_test_helper): ) url_mappings_2_ok: list[URLMapping] = await ddc.create_urls(batch_id=batch_2, count=4, status=URLStatus.OK) url_mappings_2_error: list[URLMapping] = await ddc.create_urls(batch_id=batch_2, count=2, status=URLStatus.ERROR) - url_mappings_2_validated: list[URLMapping] = await ddc.create_validated_urls(count=1, validation_type=URLValidatedType.DATA_SOURCE) - url_mappings_2_not_relevant: list[URLMapping] = await ddc.create_validated_urls(count=5, validation_type=URLValidatedType.NOT_RELEVANT) + url_mappings_2_validated: list[URLMapping] = await ddc.create_validated_urls(count=1, validation_type=URLType.DATA_SOURCE) + url_mappings_2_not_relevant: list[URLMapping] = await ddc.create_validated_urls(count=5, validation_type=URLType.NOT_RELEVANT) url_ids_2_validated: list[int] = [url_mapping.url_id for url_mapping in url_mappings_2_validated] url_ids_2_not_relevant: list[int] = [url_mapping.url_id for url_mapping in url_mappings_2_not_relevant] await ddc.create_batch_url_links( diff --git a/tests/automated/integration/api/metrics/urls/aggregated/test_pending.py b/tests/automated/integration/api/metrics/urls/aggregated/test_pending.py index 1b55f04d..fee6ef46 100644 --- a/tests/automated/integration/api/metrics/urls/aggregated/test_pending.py +++ b/tests/automated/integration/api/metrics/urls/aggregated/test_pending.py @@ -1,7 +1,8 @@ import pytest from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo -from src.core.enums import SuggestedStatus, RecordType +from src.core.enums import RecordType +from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.batch_creation_parameters.annotation_info import AnnotationInfo from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters from tests.helpers.batch_creation_parameters.url_creation_parameters import TestURLCreationParameters @@ -26,19 +27,19 @@ async def setup_test_batches(db_data_creator): batches = [ create_batch( annotation_info=AnnotationInfo( - user_relevant=SuggestedStatus.NOT_RELEVANT + user_relevant=URLType.DATA_SOURCE ) ), create_batch( annotation_info=AnnotationInfo( - user_relevant=SuggestedStatus.RELEVANT, + user_relevant=URLType.DATA_SOURCE, user_record_type=RecordType.ARREST_RECORDS ), count=2 ), create_batch( annotation_info=AnnotationInfo( - user_relevant=SuggestedStatus.RELEVANT, + user_relevant=URLType.DATA_SOURCE, user_record_type=RecordType.CALLS_FOR_SERVICE, user_agency=URLAgencyAnnotationPostInfo( suggested_agency=await db_data_creator.agency() @@ -59,7 +60,7 @@ async def setup_test_batches(db_data_creator): ), create_batch( annotation_info=AnnotationInfo( - user_relevant=SuggestedStatus.NOT_RELEVANT, + user_relevant=URLType.DATA_SOURCE, user_record_type=RecordType.PERSONNEL_RECORDS, user_agency=URLAgencyAnnotationPostInfo( suggested_agency=await db_data_creator.agency() @@ -69,7 +70,7 @@ async def setup_test_batches(db_data_creator): ), create_batch( annotation_info=AnnotationInfo( - user_relevant=SuggestedStatus.RELEVANT, + user_relevant=URLType.DATA_SOURCE, user_agency=URLAgencyAnnotationPostInfo( is_new=True ) diff --git a/tests/automated/integration/api/metrics/urls/breakdown/test_pending.py b/tests/automated/integration/api/metrics/urls/breakdown/test_pending.py index 02f1aae2..3e906a8c 100644 --- a/tests/automated/integration/api/metrics/urls/breakdown/test_pending.py +++ b/tests/automated/integration/api/metrics/urls/breakdown/test_pending.py @@ -2,8 +2,9 @@ import pytest from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo -from src.collectors.enums import CollectorType, URLStatus -from src.core.enums import SuggestedStatus, RecordType +from src.collectors.enums import CollectorType +from src.core.enums import RecordType +from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.batch_creation_parameters.annotation_info import AnnotationInfo from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters from tests.helpers.batch_creation_parameters.enums import URLCreationEnum @@ -30,7 +31,7 @@ async def test_get_urls_breakdown_pending_metrics(api_test_helper): count=1, status=URLCreationEnum.OK, annotation_info=AnnotationInfo( - user_relevant=SuggestedStatus.NOT_RELEVANT + user_relevant=URLType.NOT_RELEVANT ) ), TestURLCreationParameters( @@ -47,7 +48,7 @@ async def test_get_urls_breakdown_pending_metrics(api_test_helper): count=3, status=URLCreationEnum.OK, annotation_info=AnnotationInfo( - user_relevant=SuggestedStatus.RELEVANT, + user_relevant=URLType.DATA_SOURCE, user_record_type=RecordType.CALLS_FOR_SERVICE ) ) @@ -71,7 +72,7 @@ async def test_get_urls_breakdown_pending_metrics(api_test_helper): count=5, status=URLCreationEnum.OK, annotation_info=AnnotationInfo( - user_relevant=SuggestedStatus.RELEVANT, + user_relevant=URLType.DATA_SOURCE, user_record_type=RecordType.INCARCERATION_RECORDS, user_agency=URLAgencyAnnotationPostInfo( suggested_agency=agency_id diff --git a/tests/automated/integration/api/review/conftest.py b/tests/automated/integration/api/review/conftest.py index 59d76930..198bef59 100644 --- a/tests/automated/integration/api/review/conftest.py +++ b/tests/automated/integration/api/review/conftest.py @@ -1,8 +1,8 @@ import pytest_asyncio from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo -from src.collectors.enums import URLStatus -from src.core.enums import SuggestedStatus, RecordType +from src.core.enums import RecordType +from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.batch_creation_parameters.annotation_info import AnnotationInfo from tests.helpers.batch_creation_parameters.core import TestBatchCreationParameters from tests.helpers.batch_creation_parameters.enums import URLCreationEnum @@ -18,7 +18,7 @@ async def batch_url_creation_info(db_data_creator): count=2, status=URLCreationEnum.OK, annotation_info=AnnotationInfo( - user_relevant=SuggestedStatus.RELEVANT, + user_relevant=URLType.DATA_SOURCE, user_record_type=RecordType.ARREST_RECORDS, user_agency=URLAgencyAnnotationPostInfo( suggested_agency=await db_data_creator.agency() diff --git a/tests/automated/integration/api/review/rejection/test_individual_record.py b/tests/automated/integration/api/review/rejection/test_individual_record.py index 33addd91..fd1b8231 100644 --- a/tests/automated/integration/api/review/rejection/test_individual_record.py +++ b/tests/automated/integration/api/review/rejection/test_individual_record.py @@ -2,7 +2,7 @@ from src.api.endpoints.review.enums import RejectionReason from src.collectors.enums import URLStatus -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from tests.automated.integration.api.review.rejection.helpers import run_rejection_test from tests.helpers.api_test_helper import APITestHelper @@ -18,5 +18,5 @@ async def test_rejection_individual_record(api_test_helper: APITestHelper): # Get FlagURLValidated and confirm Individual Record flag: FlagURLValidated = (await api_test_helper.adb_client().get_all(FlagURLValidated))[0] - assert flag.type == URLValidatedType.INDIVIDUAL_RECORD + assert flag.type == URLType.INDIVIDUAL_RECORD diff --git a/tests/automated/integration/api/review/rejection/test_not_relevant.py b/tests/automated/integration/api/review/rejection/test_not_relevant.py index 03ee72d3..2cb95704 100644 --- a/tests/automated/integration/api/review/rejection/test_not_relevant.py +++ b/tests/automated/integration/api/review/rejection/test_not_relevant.py @@ -2,7 +2,7 @@ from src.api.endpoints.review.enums import RejectionReason from src.collectors.enums import URLStatus -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from tests.automated.integration.api.review.rejection.helpers import run_rejection_test @@ -17,4 +17,4 @@ async def test_rejection_not_relevant(api_test_helper): # Get FlagURLValidated and confirm Not Relevant flag: FlagURLValidated = (await api_test_helper.adb_client().get_all(FlagURLValidated))[0] - assert flag.type == URLValidatedType.NOT_RELEVANT \ No newline at end of file + assert flag.type == URLType.NOT_RELEVANT \ No newline at end of file diff --git a/tests/automated/integration/api/review/test_approve_and_get_next_source.py b/tests/automated/integration/api/review/test_approve_and_get_next_source.py index 69cf13d2..c9478111 100644 --- a/tests/automated/integration/api/review/test_approve_and_get_next_source.py +++ b/tests/automated/integration/api/review/test_approve_and_get_next_source.py @@ -6,7 +6,7 @@ from src.core.enums import RecordType from src.db.constants import PLACEHOLDER_AGENCY_NAME from src.db.models.impl.agency.sqlalchemy import Agency -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL @@ -82,4 +82,4 @@ async def test_approve_and_get_next_source_for_review(api_test_helper): # Confirm presence of FlagURLValidated flag_url_validated = await adb_client.get_all(FlagURLValidated) assert len(flag_url_validated) == 1 - assert flag_url_validated[0].type == URLValidatedType.DATA_SOURCE \ No newline at end of file + assert flag_url_validated[0].type == URLType.DATA_SOURCE \ No newline at end of file diff --git a/tests/automated/integration/api/review/test_next_source.py b/tests/automated/integration/api/review/test_next_source.py index 790914ee..47b9d710 100644 --- a/tests/automated/integration/api/review/test_next_source.py +++ b/tests/automated/integration/api/review/test_next_source.py @@ -1,6 +1,7 @@ import pytest -from src.core.enums import SuggestedStatus, RecordType +from src.core.enums import RecordType +from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review @@ -42,11 +43,11 @@ async def test_review_next_source(api_test_helper): annotation_info = result.annotations relevant_info = annotation_info.relevant assert relevant_info.auto.is_relevant == True - assert relevant_info.user == SuggestedStatus.NOT_RELEVANT + assert relevant_info.user == {URLType.NOT_RELEVANT: 1} record_type_info = annotation_info.record_type assert record_type_info.auto == RecordType.ARREST_RECORDS - assert record_type_info.user == RecordType.ACCIDENT_REPORTS + assert record_type_info.user == {RecordType.ACCIDENT_REPORTS: 1} agency_info = annotation_info.agency auto_agency_suggestions = agency_info.auto @@ -55,7 +56,8 @@ async def test_review_next_source(api_test_helper): # Check user agency suggestions exist and in descending order of count user_agency_suggestion = agency_info.user - assert user_agency_suggestion.pdap_agency_id == setup_info.user_agency_id + assert user_agency_suggestion[0].suggestion.pdap_agency_id == setup_info.user_agency_id + assert user_agency_suggestion[0].user_count == 1 # Check confirmed agencies exist diff --git a/tests/automated/integration/db/client/annotate_url/test_marked_not_relevant.py b/tests/automated/integration/db/client/annotate_url/test_marked_not_relevant.py deleted file mode 100644 index 1653da61..00000000 --- a/tests/automated/integration/db/client/annotate_url/test_marked_not_relevant.py +++ /dev/null @@ -1,66 +0,0 @@ -import pytest - -from src.core.enums import SuggestedStatus -from src.db.dtos.url.mapping import URLMapping -from tests.helpers.setup.annotation.core import setup_for_get_next_url_for_annotation -from tests.helpers.data_creator.core import DBDataCreator - - -@pytest.mark.asyncio -async def test_annotate_url_marked_not_relevant(db_data_creator: DBDataCreator): - """ - If a URL is marked not relevant by the user, they should not receive that URL - in calls to get an annotation for record type or agency - Other users should still receive the URL - """ - setup_info = await setup_for_get_next_url_for_annotation( - db_data_creator=db_data_creator, - url_count=2 - ) - adb_client = db_data_creator.adb_client - url_to_mark_not_relevant: URLMapping = setup_info.insert_urls_info.url_mappings[0] - url_to_mark_relevant: URLMapping = setup_info.insert_urls_info.url_mappings[1] - for url_mapping in setup_info.insert_urls_info.url_mappings: - await db_data_creator.agency_auto_suggestions( - url_id=url_mapping.url_id, - count=3 - ) - await adb_client.add_user_relevant_suggestion( - user_id=1, - url_id=url_to_mark_not_relevant.url_id, - suggested_status=SuggestedStatus.NOT_RELEVANT - ) - await adb_client.add_user_relevant_suggestion( - user_id=1, - url_id=url_to_mark_relevant.url_id, - suggested_status=SuggestedStatus.RELEVANT - ) - - # User should not receive the URL for record type annotation - record_type_annotation_info = await adb_client.get_next_url_for_record_type_annotation( - user_id=1, - batch_id=None - ) - assert record_type_annotation_info.url_info.url_id != url_to_mark_not_relevant.url_id - - # Other users also should not receive the URL for record type annotation - record_type_annotation_info = await adb_client.get_next_url_for_record_type_annotation( - user_id=2, - batch_id=None - ) - assert record_type_annotation_info.url_info.url_id != \ - url_to_mark_not_relevant.url_id, "Other users should not receive the URL for record type annotation" - - # User should not receive the URL for agency annotation - agency_annotation_info_user_1 = await adb_client.get_next_url_agency_for_annotation( - user_id=1, - batch_id=None - ) - assert agency_annotation_info_user_1.next_annotation.url_info.url_id != url_to_mark_not_relevant.url_id - - # Other users also should not receive the URL for agency annotation - agency_annotation_info_user_2 = await adb_client.get_next_url_agency_for_annotation( - user_id=2, - batch_id=None - ) - assert agency_annotation_info_user_1.next_annotation.url_info.url_id != url_to_mark_not_relevant.url_id diff --git a/tests/automated/integration/db/client/get_next_url_for_final_review/test_basic.py b/tests/automated/integration/db/client/get_next_url_for_final_review/test_basic.py index 3f5c3182..0d461f23 100644 --- a/tests/automated/integration/db/client/get_next_url_for_final_review/test_basic.py +++ b/tests/automated/integration/db/client/get_next_url_for_final_review/test_basic.py @@ -1,8 +1,9 @@ import pytest -from src.core.enums import SuggestedStatus, RecordType -from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review +from src.core.enums import RecordType +from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review @pytest.mark.asyncio @@ -38,11 +39,11 @@ async def test_get_next_url_for_final_review_basic(db_data_creator: DBDataCreato annotation_info = result.annotations relevant_info = annotation_info.relevant assert relevant_info.auto.is_relevant == True - assert relevant_info.user == SuggestedStatus.NOT_RELEVANT + assert relevant_info.user == {URLType.NOT_RELEVANT: 1} record_type_info = annotation_info.record_type assert record_type_info.auto == RecordType.ARREST_RECORDS - assert record_type_info.user == RecordType.ACCIDENT_REPORTS + assert record_type_info.user == {RecordType.ACCIDENT_REPORTS: 1} agency_info = annotation_info.agency auto_agency_suggestions = agency_info.auto @@ -50,4 +51,4 @@ async def test_get_next_url_for_final_review_basic(db_data_creator: DBDataCreato assert len(auto_agency_suggestions.suggestions) == 3 # Check user agency suggestion exists and is correct - assert agency_info.user.pdap_agency_id == setup_info.user_agency_id + assert agency_info.user[0].suggestion.pdap_agency_id == setup_info.user_agency_id diff --git a/tests/automated/integration/db/client/get_next_url_for_user_relevance_annotation/__init__.py b/tests/automated/integration/db/client/get_next_url_for_user_relevance_annotation/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/automated/integration/db/client/get_next_url_for_user_relevance_annotation/test_pending.py b/tests/automated/integration/db/client/get_next_url_for_user_relevance_annotation/test_pending.py deleted file mode 100644 index 9c452f15..00000000 --- a/tests/automated/integration/db/client/get_next_url_for_user_relevance_annotation/test_pending.py +++ /dev/null @@ -1,68 +0,0 @@ -import pytest - -from src.core.enums import SuggestedStatus -from tests.helpers.setup.annotation.core import setup_for_get_next_url_for_annotation -from tests.helpers.data_creator.core import DBDataCreator - - -@pytest.mark.asyncio -async def test_get_next_url_for_user_relevance_annotation_pending( - db_data_creator: DBDataCreator -): - """ - Users should receive a valid URL to annotate - All users should receive the same next URL - Once any user annotates that URL, none of the users should receive it again - """ - setup_info = await setup_for_get_next_url_for_annotation( - db_data_creator=db_data_creator, - url_count=2 - ) - - url_1 = setup_info.insert_urls_info.url_mappings[0] - - # Add `Relevancy` attribute with value `True` - await db_data_creator.auto_relevant_suggestions( - url_id=url_1.url_id, - relevant=True - ) - - adb_client = db_data_creator.adb_client - url_1 = await adb_client.get_next_url_for_relevance_annotation( - user_id=1, - batch_id=None - ) - assert url_1 is not None - - url_2 = await adb_client.get_next_url_for_relevance_annotation( - user_id=2, - batch_id=None - ) - assert url_2 is not None - - assert url_1.url_info.url == url_2.url_info.url - - # Annotate this URL, then check that the second URL is returned - await adb_client.add_user_relevant_suggestion( - url_id=url_1.url_info.url_id, - user_id=1, - suggested_status=SuggestedStatus.RELEVANT - ) - - url_3 = await adb_client.get_next_url_for_relevance_annotation( - user_id=1, - batch_id=None - ) - assert url_3 is not None - - assert url_1 != url_3 - - # Check that the second URL is also returned for another user - url_4 = await adb_client.get_next_url_for_relevance_annotation( - user_id=2, - batch_id=None - ) - assert url_4 is not None - - - assert url_4 == url_3 diff --git a/tests/automated/integration/db/client/get_next_url_for_user_relevance_annotation/test_validated.py b/tests/automated/integration/db/client/get_next_url_for_user_relevance_annotation/test_validated.py deleted file mode 100644 index ab5acd59..00000000 --- a/tests/automated/integration/db/client/get_next_url_for_user_relevance_annotation/test_validated.py +++ /dev/null @@ -1,30 +0,0 @@ -import pytest - -from src.collectors.enums import URLStatus -from tests.helpers.batch_creation_parameters.enums import URLCreationEnum -from tests.helpers.setup.annotation.core import setup_for_get_next_url_for_annotation -from tests.helpers.data_creator.core import DBDataCreator - - -@pytest.mark.asyncio -async def test_get_next_url_for_user_relevance_annotation_validated( - db_data_creator: DBDataCreator -): - """ - A validated URL should not turn up in get_next_url_for_user_annotation - """ - dbdc = db_data_creator - url_1: int = (await dbdc.create_validated_urls())[0].url_id - - # Add `Relevancy` attribute with value `True` - await db_data_creator.auto_relevant_suggestions( - url_id=url_1, - relevant=True - ) - - adb_client = db_data_creator.adb_client - url = await adb_client.get_next_url_for_relevance_annotation( - user_id=1, - batch_id=None - ) - assert url is None diff --git a/tests/automated/integration/db/client/test_get_next_url_for_annotation_batch_filtering.py b/tests/automated/integration/db/client/test_get_next_url_for_annotation_batch_filtering.py index ab7e6cde..86d4a3ee 100644 --- a/tests/automated/integration/db/client/test_get_next_url_for_annotation_batch_filtering.py +++ b/tests/automated/integration/db/client/test_get_next_url_for_annotation_batch_filtering.py @@ -1,5 +1,6 @@ import pytest +from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.core.enums import SuggestionType from tests.helpers.setup.annotation.core import setup_for_get_next_url_for_annotation from tests.helpers.data_creator.core import DBDataCreator @@ -31,38 +32,38 @@ def assert_batch_info(batch_info): # Test for relevance # If a batch id is provided, return first valid URL with that batch id - result_with_batch_id = await db_data_creator.adb_client.get_next_url_for_relevance_annotation( + result_with_batch_id: GetNextURLForAllAnnotationResponse = await db_data_creator.adb_client.get_next_url_for_all_annotations( user_id=1, batch_id=setup_info_2.batch_id ) - assert result_with_batch_id.url_info.url == url_2.url - assert_batch_info(result_with_batch_id.batch_info) + assert result_with_batch_id.next_annotation.url_info.url == url_2.url + assert_batch_info(result_with_batch_id.next_annotation.batch_info) # If no batch id is provided, return first valid URL - result_no_batch_id = await db_data_creator.adb_client.get_next_url_for_relevance_annotation( + result_no_batch_id: GetNextURLForAllAnnotationResponse = await db_data_creator.adb_client.get_next_url_for_all_annotations( user_id=1, batch_id=None ) - assert result_no_batch_id.url_info.url == url_1.url + assert result_no_batch_id.next_annotation.url_info.url == url_1.url # Test for record type # If a batch id is provided, return first valid URL with that batch id - result_with_batch_id = await db_data_creator.adb_client.get_next_url_for_record_type_annotation( + result_with_batch_id: GetNextURLForAllAnnotationResponse = await db_data_creator.adb_client.get_next_url_for_all_annotations( user_id=1, batch_id=setup_info_2.batch_id ) - assert result_with_batch_id.url_info.url == url_2.url - assert_batch_info(result_with_batch_id.batch_info) + assert result_with_batch_id.next_annotation.url_info.url == url_2.url + assert_batch_info(result_with_batch_id.next_annotation.batch_info) # If no batch id is provided, return first valid URL - result_no_batch_id = await db_data_creator.adb_client.get_next_url_for_record_type_annotation( + result_no_batch_id: GetNextURLForAllAnnotationResponse = await db_data_creator.adb_client.get_next_url_for_all_annotations( user_id=1, batch_id=None ) - assert result_no_batch_id.url_info.url == url_1.url + assert result_no_batch_id.next_annotation.url_info.url == url_1.url # Test for agency for url in [url_1, url_2]: @@ -73,7 +74,7 @@ def assert_batch_info(batch_info): ) # If a batch id is provided, return first valid URL with that batch id - result_with_batch_id = await db_data_creator.adb_client.get_next_url_agency_for_annotation( + result_with_batch_id: GetNextURLForAllAnnotationResponse = await db_data_creator.adb_client.get_next_url_for_all_annotations( user_id=1, batch_id=setup_info_2.batch_id ) @@ -82,7 +83,7 @@ def assert_batch_info(batch_info): assert_batch_info(result_with_batch_id.next_annotation.batch_info) # If no batch id is provided, return first valid URL - result_no_batch_id = await db_data_creator.adb_client.get_next_url_agency_for_annotation( + result_no_batch_id: GetNextURLForAllAnnotationResponse = await db_data_creator.adb_client.get_next_url_for_all_annotations( user_id=1, batch_id=None ) @@ -91,7 +92,7 @@ def assert_batch_info(batch_info): # All annotations - result_with_batch_id = await db_data_creator.adb_client.get_next_url_for_all_annotations( + result_with_batch_id: GetNextURLForAllAnnotationResponse = await db_data_creator.adb_client.get_next_url_for_all_annotations( batch_id=setup_info_2.batch_id, user_id=1 ) @@ -100,7 +101,7 @@ def assert_batch_info(batch_info): assert_batch_info(result_with_batch_id.next_annotation.batch_info) # If no batch id is provided, return first valid URL - result_no_batch_id = await db_data_creator.adb_client.get_next_url_for_all_annotations( + result_no_batch_id: GetNextURLForAllAnnotationResponse = await db_data_creator.adb_client.get_next_url_for_all_annotations( batch_id=None, user_id=1 ) diff --git a/tests/automated/integration/db/client/test_get_next_url_for_user_agency_annotation.py b/tests/automated/integration/db/client/test_get_next_url_for_user_agency_annotation.py deleted file mode 100644 index 707399c9..00000000 --- a/tests/automated/integration/db/client/test_get_next_url_for_user_agency_annotation.py +++ /dev/null @@ -1,61 +0,0 @@ -import pytest - -from tests.helpers.setup.annotate_agency.core import setup_for_annotate_agency -from tests.helpers.data_creator.core import DBDataCreator - - -@pytest.mark.asyncio -async def test_get_next_url_for_user_agency_annotation(db_data_creator: DBDataCreator): - """ - All users should receive the same next valid URL for agency annotation - Once any user annotates that URL, none of the users should receive it - """ - setup_info = await setup_for_annotate_agency( - db_data_creator, - url_count=2 - ) - - # All users should receive the same URL - url_1 = setup_info.url_ids[0] - url_2 = setup_info.url_ids[1] - - adb_client = db_data_creator.adb_client - url_user_1 = await adb_client.get_next_url_agency_for_annotation( - user_id=1, - batch_id=None - ) - assert url_user_1 is not None - - url_user_2 = await adb_client.get_next_url_agency_for_annotation( - user_id=2, - batch_id=None - ) - - assert url_user_2 is not None - - # Check that the URLs are the same - assert url_user_1 == url_user_2 - - # Annotate the URL - await adb_client.add_agency_manual_suggestion( - url_id=url_1, - user_id=1, - is_new=True, - agency_id=None - ) - - # Both users should receive the next URL - next_url_user_1 = await adb_client.get_next_url_agency_for_annotation( - user_id=1, - batch_id=None - ) - assert next_url_user_1 is not None - - next_url_user_2 = await adb_client.get_next_url_agency_for_annotation( - user_id=2, - batch_id=None - ) - assert next_url_user_2 is not None - - assert url_user_1 != next_url_user_1 - assert next_url_user_1 == next_url_user_2 diff --git a/tests/automated/integration/db/client/test_get_next_url_for_user_record_type_annotation.py b/tests/automated/integration/db/client/test_get_next_url_for_user_record_type_annotation.py deleted file mode 100644 index 203cb710..00000000 --- a/tests/automated/integration/db/client/test_get_next_url_for_user_record_type_annotation.py +++ /dev/null @@ -1,59 +0,0 @@ -import pytest - -from src.core.enums import RecordType -from tests.helpers.setup.annotation.core import setup_for_get_next_url_for_annotation -from tests.helpers.data_creator.core import DBDataCreator - - -@pytest.mark.asyncio -async def test_get_next_url_for_user_record_type_annotation(db_data_creator: DBDataCreator): - """ - All users should receive the same next valid URL for record type annotation - Once any user annotates that URL, none of the users should receive it - """ - setup_info = await setup_for_get_next_url_for_annotation( - db_data_creator, - url_count=2 - ) - - # All users should receive the same URL - url_1 = setup_info.insert_urls_info.url_mappings[0] - url_2 = setup_info.insert_urls_info.url_mappings[1] - - adb_client = db_data_creator.adb_client - - url_user_1 = await adb_client.get_next_url_for_record_type_annotation( - user_id=1, - batch_id=None - ) - assert url_user_1 is not None - - url_user_2 = await adb_client.get_next_url_for_record_type_annotation( - user_id=2, - batch_id=None - ) - - assert url_user_2 is not None - - # Check that the URLs are the same - assert url_user_1 == url_user_2 - - # After annotating, both users should receive a different URL - await adb_client.add_user_record_type_suggestion( - user_id=1, - url_id=url_1.url_id, - record_type=RecordType.ARREST_RECORDS - ) - - next_url_user_1 = await adb_client.get_next_url_for_record_type_annotation( - user_id=1, - batch_id=None - ) - - next_url_user_2 = await adb_client.get_next_url_for_record_type_annotation( - user_id=2, - batch_id=None - ) - - assert next_url_user_1 != url_user_1 - assert next_url_user_1 == next_url_user_2 diff --git a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/convert.py b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/convert.py index 2fb5b2d0..ed17cb36 100644 --- a/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/convert.py +++ b/tests/automated/integration/tasks/scheduled/impl/huggingface/setup/queries/convert.py @@ -1,14 +1,14 @@ -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from tests.automated.integration.tasks.scheduled.impl.huggingface.setup.enums import \ PushToHuggingFaceTestSetupStatusEnum def convert_test_status_to_validated_status( status: PushToHuggingFaceTestSetupStatusEnum -) -> URLValidatedType: +) -> URLType: match status: case PushToHuggingFaceTestSetupStatusEnum.DATA_SOURCE: - return URLValidatedType.DATA_SOURCE + return URLType.DATA_SOURCE case PushToHuggingFaceTestSetupStatusEnum.NOT_RELEVANT: - return URLValidatedType.NOT_RELEVANT + return URLType.NOT_RELEVANT case _: raise ValueError(f"Invalid test status for function: {status}") \ No newline at end of file diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/agency/setup/core.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/setup/core.py index cb84b014..0712d251 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync/agency/setup/core.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/setup/core.py @@ -3,7 +3,7 @@ from unittest.mock import patch, AsyncMock from src.core.enums import RecordType -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.external.pdap.client import PDAPClient from src.external.pdap.dtos.sync.agencies import AgenciesSyncResponseInfo, AgenciesSyncResponseInnerInfo from tests.helpers.data_creator.core import DBDataCreator @@ -26,7 +26,7 @@ def set_up_mock_pdap_client_responses( async def set_up_urls( db_data_creator: DBDataCreator, record_type: RecordType, - validated_type: URLValidatedType | None = None, + validated_type: URLType | None = None, agency_ids: list[int] | None = None, ) -> list[int]: """Create 2 Test URLs in database.""" diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_ds_url_in_db_not_sync.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_ds_url_in_db_not_sync.py index 42384615..8cc57cf5 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_ds_url_in_db_not_sync.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_ds_url_in_db_not_sync.py @@ -6,7 +6,7 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.mapping import URLMapping from src.db.models.impl.agency.sqlalchemy import Agency -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL @@ -45,7 +45,7 @@ async def test_data_sources_url_in_db_not_meta_url_sync( # Create additional URL Validated as data source and link to agency ds_url_mapping: URLMapping = (await db_data_creator.create_validated_urls( - validation_type=URLValidatedType.DATA_SOURCE, + validation_type=URLType.DATA_SOURCE, record_type=RecordType.ACCIDENT_REPORTS ))[0] ds_url_id: int = ds_url_mapping.url_id @@ -83,8 +83,8 @@ async def test_data_sources_url_in_db_not_meta_url_sync( flags: list[FlagURLValidated] = await db_client.get_all(FlagURLValidated) assert len(flags) == 2 assert set(flag.type for flag in flags) == { - URLValidatedType.META_URL, - URLValidatedType.DATA_SOURCE + URLType.META_URL, + URLType.DATA_SOURCE } assert set(flag.url_id for flag in flags) == set(url.id for url in urls) diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_meta_url_in_db_not_sync.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_meta_url_in_db_not_sync.py index 9db57ec7..5fe62211 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_meta_url_in_db_not_sync.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_meta_url_in_db_not_sync.py @@ -6,7 +6,7 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.mapping import URLMapping from src.db.models.impl.agency.sqlalchemy import Agency -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL @@ -34,7 +34,7 @@ async def test_meta_url_in_db_not_sync( agency_id: int = 1 await db_data_creator.create_agency(agency_id) meta_url_mapping: URLMapping = (await db_data_creator.create_validated_urls( - validation_type=URLValidatedType.META_URL, + validation_type=URLType.META_URL, record_type=RecordType.CONTACT_INFO_AND_AGENCY_META ))[0] meta_url_id: int = meta_url_mapping.url_id @@ -71,7 +71,7 @@ async def test_meta_url_in_db_not_sync( # Confirm 1 Validated Flag flags: list[FlagURLValidated] = await db_client.get_all(FlagURLValidated) assert len(flags) == 1 - assert all(flag.type == URLValidatedType.META_URL for flag in flags) + assert all(flag.type == URLType.META_URL for flag in flags) assert all(flag.url_id == meta_url_id for flag in flags) diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_same_meta_url_diff_agency.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_same_meta_url_diff_agency.py index 9a0e920b..5e63a79d 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_same_meta_url_diff_agency.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_same_meta_url_diff_agency.py @@ -6,7 +6,7 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.mapping import URLMapping from src.db.models.impl.agency.sqlalchemy import Agency -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL @@ -35,7 +35,7 @@ async def test_same_meta_url_diff_agency( await db_data_creator.create_agency(existing_agency_id) meta_url_mapping: URLMapping = (await db_data_creator.create_validated_urls( - validation_type=URLValidatedType.META_URL, + validation_type=URLType.META_URL, record_type=RecordType.CONTACT_INFO_AND_AGENCY_META ))[0] meta_url_id: int = meta_url_mapping.url_id @@ -73,5 +73,5 @@ async def test_same_meta_url_diff_agency( # Confirm 2 Validated Flag flags: list[FlagURLValidated] = await db_client.get_all(FlagURLValidated) assert len(flags) == 1 - assert all(flag.type == URLValidatedType.META_URL for flag in flags) + assert all(flag.type == URLType.META_URL for flag in flags) assert all(flag.url_id == meta_url_id for flag in flags) diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_with_meta_url_not_in_database.py b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_with_meta_url_not_in_database.py index 13a8eb20..247a2ba0 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_with_meta_url_not_in_database.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/agency/test_with_meta_url_not_in_database.py @@ -5,7 +5,7 @@ from src.core.tasks.scheduled.impl.sync.agency.operator import SyncAgenciesTaskOperator from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.agency.sqlalchemy import Agency -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL @@ -63,5 +63,5 @@ async def test_with_meta_url_not_in_database( # Confirm 2 Validated Flags flags: list[FlagURLValidated] = await db_client.get_all(FlagURLValidated) assert len(flags) == 2 - assert all(flag.type == URLValidatedType.META_URL for flag in flags) + assert all(flag.type == URLType.META_URL for flag in flags) assert set(flag.url_id for flag in flags) == set(url.id for url in urls) diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/core.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/core.py index f7cd3337..847add04 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/core.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/core.py @@ -5,7 +5,7 @@ from src.collectors.enums import URLStatus from src.core.enums import RecordType from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.external.pdap.client import PDAPClient from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInfo, DataSourcesSyncResponseInnerInfo from src.external.pdap.enums import ApprovalStatus, DataSourcesURLStatus @@ -41,7 +41,7 @@ def set_up_mock_pdap_client_responses( async def set_up_urls( adb_client: AsyncDatabaseClient, record_type: RecordType, - validated_type: URLValidatedType | None = None, + validated_type: URLType | None = None, previously_synced: bool = False, ) -> list[int]: """Creates 2 test URLs.""" diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/url_/requester.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/url_/requester.py index a514b151..58735685 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/url_/requester.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/url_/requester.py @@ -1,7 +1,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.core.enums import RecordType -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.pydantic.insert import URLInsertModel @@ -32,7 +32,7 @@ async def insert_urls( async def insert_validated_flags( self, url_ids: list[int], - validated_type: URLValidatedType + validated_type: URLType ) -> None: to_insert: list[FlagURLValidatedPydantic] = [] for url_id in url_ids: diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/url_/url.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/url_/url.py index 0176a95f..f7ceae61 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/url_/url.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/setup/queries/url_/url.py @@ -1,7 +1,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from src.core.enums import RecordType -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.queries.base.builder import QueryBuilderBase from tests.automated.integration.tasks.scheduled.impl.sync.data_sources.setup.queries.url_.requester import \ TestDataSourcesSyncURLSetupQueryRequester @@ -12,7 +12,7 @@ class TestDataSourcesSyncURLSetupQueryBuilder(QueryBuilderBase): def __init__( self, record_type: RecordType, - validated_type: URLValidatedType | None = None, + validated_type: URLType | None = None, previously_synced: bool = False, ): super().__init__() diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_db_only.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_db_only.py index 87cf163a..da243117 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_db_only.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_db_only.py @@ -8,7 +8,7 @@ from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL from src.external.pdap.dtos.sync.data_sources import DataSourcesSyncResponseInfo diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_meta_url_not_modified.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_meta_url_not_modified.py index 51d40d6f..2e5eab87 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_meta_url_not_modified.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_meta_url_not_modified.py @@ -5,7 +5,7 @@ from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL @@ -30,7 +30,7 @@ async def test_meta_url_not_modified( original_url_ids: list[int] = await set_up_urls( adb_client=adb_client_test, record_type=RecordType.CONTACT_INFO_AND_AGENCY_META, - validated_type=URLValidatedType.META_URL, + validated_type=URLType.META_URL, ) # Link URLs to existing agencies await db_data_creator.create_url_agency_links( @@ -81,8 +81,8 @@ async def test_meta_url_not_modified( flags: list[FlagURLValidated] = await adb_client_test.get_all(FlagURLValidated) assert len(flags) == 4 assert set([flag.type for flag in flags]) == { - URLValidatedType.META_URL, - URLValidatedType.DATA_SOURCE, + URLType.META_URL, + URLType.DATA_SOURCE, } assert set(flag.url_id for flag in flags) == set(all_url_ids) diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_broken_approved.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_broken_approved.py index 7878c83f..9a6bf120 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_broken_approved.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_broken_approved.py @@ -8,7 +8,7 @@ from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator from src.core.tasks.scheduled.impl.sync.data_sources.params import DataSourcesSyncParameters from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL @@ -72,7 +72,7 @@ async def test_url_broken_approved( # Confirm presence of validated flag flags: list[FlagURLValidated] = await adb_client_test.get_all(FlagURLValidated) assert len(flags) == 2 - assert all([flag.type == URLValidatedType.DATA_SOURCE for flag in flags]) + assert all([flag.type == URLType.DATA_SOURCE for flag in flags]) assert set(flag.url_id for flag in flags) == set(url_ids) # Confirm presence of sync status row diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_in_db_overwritten_by_ds.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_in_db_overwritten_by_ds.py index e1c7f33c..f305cee4 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_in_db_overwritten_by_ds.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_in_db_overwritten_by_ds.py @@ -5,7 +5,7 @@ from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL @@ -33,7 +33,7 @@ async def test_url_in_db_overwritten_by_ds( url_ids: list[int] = await set_up_urls( adb_client=adb_client_test, record_type=RecordType.COMPLAINTS_AND_MISCONDUCT, - validated_type=URLValidatedType.DATA_SOURCE, + validated_type=URLType.DATA_SOURCE, ) # Link URLs to 2 existing agencies links: list[LinkURLAgency] = [] @@ -89,6 +89,6 @@ async def test_url_in_db_overwritten_by_ds( # Confirm validated types overwritten flags: list[FlagURLValidated] = await adb_client_test.get_all(FlagURLValidated) assert len(flags) == 2 - assert all([flag.type == URLValidatedType.NOT_RELEVANT for flag in flags]) + assert all([flag.type == URLType.NOT_RELEVANT for flag in flags]) assert set(flag.url_id for flag in flags) == set(url_ids) diff --git a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_ok_approved.py b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_ok_approved.py index eeff4028..157353ab 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_ok_approved.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync/data_sources/test_url_ok_approved.py @@ -5,7 +5,7 @@ from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.scheduled.impl.sync.data_sources.operator import SyncDataSourcesTaskOperator from src.db.client.async_ import AsyncDatabaseClient -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL from src.external.pdap.enums import ApprovalStatus, DataSourcesURLStatus @@ -59,5 +59,5 @@ async def test_url_ok_approved( # Confirm presence of validated flag flags: list[FlagURLValidated] = await adb_client_test.get_all(FlagURLValidated) assert len(flags) == 2 - assert all([flag.type == URLValidatedType.DATA_SOURCE for flag in flags]) + assert all([flag.type == URLType.DATA_SOURCE for flag in flags]) assert set(flag.url_id for flag in flags) == set(url_ids) diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/ineligible_cases/test_blacklist.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/ineligible_cases/test_blacklist.py index 05a9e2bb..2334aa17 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/ineligible_cases/test_blacklist.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/ineligible_cases/test_blacklist.py @@ -2,7 +2,7 @@ from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator from src.db.dtos.url.mapping import URLMapping -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.data_creator.core import DBDataCreator @@ -29,7 +29,7 @@ async def test_blacklist( # Create Meta URLs meta_urls: list[URLMapping] = await db_data_creator.create_validated_urls( count=3, - validation_type=URLValidatedType.META_URL + validation_type=URLType.META_URL ) # Create 3 agencies diff --git a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/test_happy_path.py b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/test_happy_path.py index 43a1677c..10e3f711 100644 --- a/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/test_happy_path.py +++ b/tests/automated/integration/tasks/url/impl/agency_identification/subtasks/homepage_match/test_happy_path.py @@ -6,7 +6,7 @@ from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.mapping import URLMapping -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion @@ -46,7 +46,7 @@ async def test_homepage_match( # Create 1 Meta URL for single agency case single_meta_url_id: int = (await db_data_creator.create_validated_urls( count=1, - validation_type=URLValidatedType.META_URL + validation_type=URLType.META_URL ))[0].url_id # Link single meta URL to single agency await db_data_creator.create_url_agency_links( @@ -62,7 +62,7 @@ async def test_homepage_match( # Create 2 Meta URLs and agencies for multi agency case multi_meta_urls: list[URLMapping] = await db_data_creator.create_validated_urls( count=2, - validation_type=URLValidatedType.META_URL + validation_type=URLType.META_URL ) multi_meta_url_ids: list[int] = [url_mapping.url_id for url_mapping in multi_meta_urls] # Link multi meta URLs to agencies diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py index e788fff1..85dd71f5 100644 --- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_error.py @@ -1,7 +1,7 @@ import pytest from src.collectors.enums import URLStatus -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager @@ -33,7 +33,7 @@ async def test_url_probe_task_error( ) assert not await operator.meets_task_prerequisites() url_id: int = await setup_manager.setup_url(URLStatus.OK) - await db_data_creator.create_validated_flags([url_id], validation_type=URLValidatedType.DATA_SOURCE) + await db_data_creator.create_validated_flags([url_id], validation_type=URLType.DATA_SOURCE) await db_data_creator.create_url_data_sources([url_id]) assert await operator.meets_task_prerequisites() diff --git a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py index 7fc54da4..31216e23 100644 --- a/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py +++ b/tests/automated/integration/tasks/url/impl/probe/no_redirect/test_not_found.py @@ -1,7 +1,7 @@ import pytest from src.collectors.enums import URLStatus -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from tests.automated.integration.tasks.url.impl.asserts import assert_task_ran_without_error from tests.automated.integration.tasks.url.impl.probe.check.manager import TestURLProbeCheckManager from tests.automated.integration.tasks.url.impl.probe.setup.manager import TestURLProbeSetupManager @@ -33,7 +33,7 @@ async def test_url_probe_task_not_found( ) assert not await operator.meets_task_prerequisites() url_id = await setup_manager.setup_url(URLStatus.OK) - await db_data_creator.create_validated_flags([url_id], validation_type=URLValidatedType.NOT_RELEVANT) + await db_data_creator.create_validated_flags([url_id], validation_type=URLType.NOT_RELEVANT) assert await operator.meets_task_prerequisites() run_info = await operator.run_task() assert_task_ran_without_error(run_info) diff --git a/tests/automated/integration/tasks/url/impl/submit_approved/test_validated_meta_url.py b/tests/automated/integration/tasks/url/impl/submit_approved/test_validated_meta_url.py index 5f927159..d9b5a380 100644 --- a/tests/automated/integration/tasks/url/impl/submit_approved/test_validated_meta_url.py +++ b/tests/automated/integration/tasks/url/impl/submit_approved/test_validated_meta_url.py @@ -2,7 +2,7 @@ from src.core.tasks.base.run_info import TaskOperatorRunInfo from src.core.tasks.url.operators.submit_approved.core import SubmitApprovedURLTaskOperator -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource from src.external.pdap.client import PDAPClient from tests.helpers.asserts import assert_task_run_success @@ -27,7 +27,7 @@ async def test_validated_meta_url_not_included( dbdc = db_data_creator url_1: int = (await dbdc.create_validated_urls( - validation_type=URLValidatedType.META_URL + validation_type=URLType.META_URL ))[0].url_id # Test task operator does not meet prerequisites diff --git a/tests/automated/integration/api/annotate/relevancy/__init__.py b/tests/automated/unit/api/__init__.py similarity index 100% rename from tests/automated/integration/api/annotate/relevancy/__init__.py rename to tests/automated/unit/api/__init__.py diff --git a/tests/automated/unit/api/test_all_annotation_post_info.py b/tests/automated/unit/api/test_all_annotation_post_info.py new file mode 100644 index 00000000..549f6d79 --- /dev/null +++ b/tests/automated/unit/api/test_all_annotation_post_info.py @@ -0,0 +1,156 @@ +import pytest +from pydantic import BaseModel + +from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo +from src.core.enums import RecordType +from src.core.exceptions import FailedValidationException +from src.db.models.impl.flag.url_validated.enums import URLType + + +class TestAllAnnotationPostInfoParams(BaseModel): + suggested_status: URLType + record_type: RecordType | None + agency_ids: list[int] + location_ids: list[int] + raise_exception: bool + +@pytest.mark.parametrize( + "params", + [ + # Happy Paths + TestAllAnnotationPostInfoParams( + suggested_status=URLType.META_URL, + record_type=None, + agency_ids=[1, 2], + location_ids=[3,4], + raise_exception=False + ), + TestAllAnnotationPostInfoParams( + suggested_status=URLType.DATA_SOURCE, + record_type=RecordType.ACCIDENT_REPORTS, + agency_ids=[1, 2], + location_ids=[3,4], + raise_exception=False + ), + TestAllAnnotationPostInfoParams( + suggested_status=URLType.NOT_RELEVANT, + record_type=None, + agency_ids=[], + location_ids=[], + raise_exception=False + ), + TestAllAnnotationPostInfoParams( + suggested_status=URLType.INDIVIDUAL_RECORD, + record_type=None, + agency_ids=[], + location_ids=[], + raise_exception=False + ), + # Error Paths - Meta URL + TestAllAnnotationPostInfoParams( + suggested_status=URLType.META_URL, + record_type=RecordType.ACCIDENT_REPORTS, # Record Type Included + agency_ids=[1, 2], + location_ids=[3, 4], + raise_exception=True + ), + TestAllAnnotationPostInfoParams( + suggested_status=URLType.META_URL, + record_type=None, + agency_ids=[], # No agency IDs + location_ids=[3, 4], + raise_exception=True + ), + TestAllAnnotationPostInfoParams( + suggested_status=URLType.META_URL, + record_type=None, + agency_ids=[1, 2], + location_ids=[], # No Location IDs + raise_exception=True + ), + # Error Paths - Data Source + TestAllAnnotationPostInfoParams( + suggested_status=URLType.DATA_SOURCE, + record_type=None, # No record type + agency_ids=[1, 2], + location_ids=[3, 4], + raise_exception=True + ), + TestAllAnnotationPostInfoParams( + suggested_status=URLType.DATA_SOURCE, + record_type=RecordType.ACCIDENT_REPORTS, + agency_ids=[], # No Agency IDs + location_ids=[3, 4], + raise_exception=True + ), + TestAllAnnotationPostInfoParams( + suggested_status=URLType.DATA_SOURCE, + record_type=RecordType.ACCIDENT_REPORTS, + agency_ids=[1, 2], + location_ids=[], # No Location IDs + raise_exception=True + ), + # Error Paths - Not Relevant + TestAllAnnotationPostInfoParams( + suggested_status=URLType.NOT_RELEVANT, + record_type=RecordType.ACCIDENT_REPORTS, # Record Type Included + agency_ids=[], + location_ids=[], + raise_exception=True + ), + TestAllAnnotationPostInfoParams( + suggested_status=URLType.NOT_RELEVANT, + record_type=None, + agency_ids=[1, 2], # Agency IDs Included + location_ids=[], + raise_exception=True + ), + TestAllAnnotationPostInfoParams( + suggested_status=URLType.NOT_RELEVANT, + record_type=None, + agency_ids=[], + location_ids=[1, 2], # Location IDs included + raise_exception=True + ), + # Error Paths - Individual Record + TestAllAnnotationPostInfoParams( + suggested_status=URLType.INDIVIDUAL_RECORD, + record_type=RecordType.ACCIDENT_REPORTS, # Record Type Included + agency_ids=[], + location_ids=[], + raise_exception=True + ), + TestAllAnnotationPostInfoParams( + suggested_status=URLType.INDIVIDUAL_RECORD, + record_type=None, + agency_ids=[1, 2], # Agency IDs Included + location_ids=[], + raise_exception=True + ), + TestAllAnnotationPostInfoParams( + suggested_status=URLType.INDIVIDUAL_RECORD, + record_type=None, + agency_ids=[], + location_ids=[1, 2], # Location IDs included + raise_exception=True + ) + ] +) +def test_all_annotation_post_info( + params: TestAllAnnotationPostInfoParams +): + if params.raise_exception: + with pytest.raises(FailedValidationException): + AllAnnotationPostInfo( + suggested_status=params.suggested_status, + record_type=params.record_type, + agency_ids=params.agency_ids, + location_ids=params.location_ids + ) + else: + AllAnnotationPostInfo( + suggested_status=params.suggested_status, + record_type=params.record_type, + agency_ids=params.agency_ids, + location_ids=params.location_ids + ) \ No newline at end of file diff --git a/tests/helpers/batch_creation_parameters/annotation_info.py b/tests/helpers/batch_creation_parameters/annotation_info.py index f9c9ef2d..cef99f43 100644 --- a/tests/helpers/batch_creation_parameters/annotation_info.py +++ b/tests/helpers/batch_creation_parameters/annotation_info.py @@ -3,11 +3,12 @@ from pydantic import BaseModel from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo -from src.core.enums import SuggestedStatus, RecordType +from src.core.enums import RecordType +from src.db.models.impl.flag.url_validated.enums import URLType class AnnotationInfo(BaseModel): - user_relevant: Optional[SuggestedStatus] = None + user_relevant: Optional[URLType] = None auto_relevant: Optional[bool] = None user_record_type: Optional[RecordType] = None auto_record_type: Optional[RecordType] = None diff --git a/tests/helpers/data_creator/commands/impl/suggestion/user/relevant.py b/tests/helpers/data_creator/commands/impl/suggestion/user/relevant.py index 9d4df2c3..0dfd5a3f 100644 --- a/tests/helpers/data_creator/commands/impl/suggestion/user/relevant.py +++ b/tests/helpers/data_creator/commands/impl/suggestion/user/relevant.py @@ -3,9 +3,10 @@ from typing_extensions import override -from src.core.enums import SuggestedStatus +from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.data_creator.commands.base import DBDataCreatorCommandBase + @final class UserRelevantSuggestionCommand(DBDataCreatorCommandBase): @@ -13,7 +14,7 @@ def __init__( self, url_id: int, user_id: int | None = None, - suggested_status: SuggestedStatus = SuggestedStatus.RELEVANT + suggested_status: URLType = URLType.DATA_SOURCE ): super().__init__() self.url_id = url_id diff --git a/tests/helpers/data_creator/commands/impl/urls_/convert.py b/tests/helpers/data_creator/commands/impl/urls_/convert.py index d76edfe5..bfefc7bd 100644 --- a/tests/helpers/data_creator/commands/impl/urls_/convert.py +++ b/tests/helpers/data_creator/commands/impl/urls_/convert.py @@ -1,5 +1,5 @@ from src.collectors.enums import URLStatus -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.batch_creation_parameters.enums import URLCreationEnum @@ -24,13 +24,13 @@ def convert_url_creation_enum_to_url_status(url_creation_enum: URLCreationEnum) def convert_url_creation_enum_to_validated_type( url_creation_enum: URLCreationEnum -) -> URLValidatedType: +) -> URLType: match url_creation_enum: case URLCreationEnum.SUBMITTED: - return URLValidatedType.DATA_SOURCE + return URLType.DATA_SOURCE case URLCreationEnum.VALIDATED: - return URLValidatedType.DATA_SOURCE + return URLType.DATA_SOURCE case URLCreationEnum.NOT_RELEVANT: - return URLValidatedType.NOT_RELEVANT + return URLType.NOT_RELEVANT case _: raise ValueError(f"Unknown URLCreationEnum: {url_creation_enum}") \ No newline at end of file diff --git a/tests/helpers/data_creator/core.py b/tests/helpers/data_creator/core.py index bacddfd6..eb7ef3f7 100644 --- a/tests/helpers/data_creator/core.py +++ b/tests/helpers/data_creator/core.py @@ -3,24 +3,24 @@ from typing import Optional, Any from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo +from src.collectors.enums import CollectorType, URLStatus +from src.core.enums import BatchStatus, SuggestionType, RecordType from src.core.tasks.url.operators.agency_identification.dtos.suggestion import URLAgencySuggestionInfo +from src.core.tasks.url.operators.misc_metadata.tdo import URLMiscellaneousMetadataTDO from src.db.client.async_ import AsyncDatabaseClient +from src.db.client.sync import DatabaseClient +from src.db.dtos.url.insert import InsertURLsInfo from src.db.dtos.url.mapping import URLMapping +from src.db.enums import TaskType from src.db.models.impl.agency.sqlalchemy import Agency from src.db.models.impl.duplicate.pydantic.insert import DuplicateInsertInfo -from src.db.dtos.url.insert import InsertURLsInfo from src.db.models.impl.flag.root_url.sqlalchemy import FlagRootURL -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic -from src.db.client.sync import DatabaseClient -from src.db.enums import TaskType -from src.collectors.enums import CollectorType, URLStatus -from src.core.tasks.url.operators.misc_metadata.tdo import URLMiscellaneousMetadataTDO -from src.core.enums import BatchStatus, SuggestionType, RecordType, SuggestedStatus from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask @@ -168,7 +168,7 @@ async def user_relevant_suggestion( self, url_id: int, user_id: int | None = None, - suggested_status: SuggestedStatus = SuggestedStatus.RELEVANT + suggested_status: URLType = URLType.DATA_SOURCE ) -> None: await self.run_command( UserRelevantSuggestionCommand( @@ -388,7 +388,7 @@ async def url_metadata( async def create_validated_urls( self, record_type: RecordType = RecordType.RESOURCES, - validation_type: URLValidatedType = URLValidatedType.DATA_SOURCE, + validation_type: URLType = URLType.DATA_SOURCE, count: int = 1 ) -> list[URLMapping]: url_mappings: list[URLMapping] = await self.create_urls( @@ -414,7 +414,7 @@ async def create_submitted_urls( url_ids: list[int] = [url_mapping.url_id for url_mapping in url_mappings] await self.create_validated_flags( url_ids=url_ids, - validation_type=URLValidatedType.DATA_SOURCE + validation_type=URLType.DATA_SOURCE ) await self.create_url_data_sources(url_ids=url_ids) return url_mappings @@ -473,7 +473,7 @@ async def create_batch_url_links( async def create_validated_flags( self, url_ids: list[int], - validation_type: URLValidatedType, + validation_type: URLType, ) -> None: await create_validated_flags( adb_client=self.adb_client, diff --git a/tests/helpers/data_creator/create.py b/tests/helpers/data_creator/create.py index 31c5c316..fb3c20ad 100644 --- a/tests/helpers/data_creator/create.py +++ b/tests/helpers/data_creator/create.py @@ -6,7 +6,7 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.dtos.url.mapping import URLMapping from src.db.models.impl.batch.pydantic.insert import BatchInsertModel -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic from src.db.models.impl.link.batch_url.pydantic import LinkBatchURLPydantic from src.db.models.impl.url.core.enums import URLSource @@ -50,7 +50,7 @@ async def create_urls( async def create_validated_flags( adb_client: AsyncDatabaseClient, url_ids: list[int], - validation_type: URLValidatedType, + validation_type: URLType, ) -> None: validated_flags: list[FlagURLValidatedPydantic] = generate_validated_flags( url_ids=url_ids, diff --git a/tests/helpers/data_creator/generate.py b/tests/helpers/data_creator/generate.py index 5dabc016..ad730a71 100644 --- a/tests/helpers/data_creator/generate.py +++ b/tests/helpers/data_creator/generate.py @@ -3,7 +3,7 @@ from src.collectors.enums import URLStatus, CollectorType from src.core.enums import BatchStatus, RecordType from src.db.models.impl.batch.pydantic.insert import BatchInsertModel -from src.db.models.impl.flag.url_validated.enums import URLValidatedType +from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.pydantic import FlagURLValidatedPydantic from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.batch_url.pydantic import LinkBatchURLPydantic @@ -60,7 +60,7 @@ def generate_urls( def generate_validated_flags( url_ids: list[int], - validation_type: URLValidatedType, + validation_type: URLType, ) -> list[FlagURLValidatedPydantic]: return [ FlagURLValidatedPydantic( diff --git a/tests/helpers/setup/final_review/core.py b/tests/helpers/setup/final_review/core.py index 58b1ae49..b3841b37 100644 --- a/tests/helpers/setup/final_review/core.py +++ b/tests/helpers/setup/final_review/core.py @@ -1,7 +1,8 @@ from typing import Optional from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo -from src.core.enums import RecordType, SuggestedStatus +from src.core.enums import RecordType +from src.db.models.impl.flag.url_validated.enums import URLType from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.setup.final_review.model import FinalReviewSetupInfo @@ -46,7 +47,7 @@ async def add_record_type_suggestion(record_type: RecordType) -> None: async def add_relevant_suggestion(relevant: bool): await db_data_creator.user_relevant_suggestion( url_id=url_mapping.url_id, - suggested_status=SuggestedStatus.RELEVANT if relevant else SuggestedStatus.NOT_RELEVANT + suggested_status=URLType.DATA_SOURCE if relevant else URLType.NOT_RELEVANT ) await db_data_creator.auto_relevant_suggestions(