diff --git a/alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py b/alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py new file mode 100644 index 00000000..775caddf --- /dev/null +++ b/alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py @@ -0,0 +1,61 @@ +"""Set user annotation tables to allow only one annotation per url + +Revision ID: 997f5bf53772 +Revises: ed06a5633d2e +Create Date: 2025-04-16 19:54:59.798580 + +""" +from typing import Sequence, Union + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = '997f5bf53772' +down_revision: Union[str, None] = 'ed06a5633d2e' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Delete entries with more than one annotation + # Relevance + op.execute(""" + with ranked as( + SELECT + id, + ROW_NUMBER() OVER (PARTITION BY URL_ID ORDER BY id) as rn + FROM + USER_RELEVANT_SUGGESTIONS + ) + DELETE FROM user_relevant_suggestions + USING ranked + WHERE USER_RELEVANT_SUGGESTIONS.id = ranked.id + and ranked.rn > 1 + """) + # Record Type + op.execute(""" + with ranked as( + SELECT + id, + ROW_NUMBER() OVER (PARTITION BY URL_ID ORDER BY id) as rn + FROM + USER_RECORD_TYPE_SUGGESTIONS + ) + DELETE FROM user_record_type_suggestions + USING ranked + WHERE USER_RECORD_TYPE_SUGGESTIONS.id = ranked.id + and ranked.rn > 1 + """) + + # Add unique constraint to url_id column + op.create_unique_constraint('uq_user_relevant_suggestions_url_id', 'user_relevant_suggestions', ['url_id']) + op.create_unique_constraint('uq_user_record_type_suggestions_url_id', 'user_record_type_suggestions', ['url_id']) + op.create_unique_constraint('uq_user_agency_suggestions_url_id', 'user_url_agency_suggestions', ['url_id']) + + + +def downgrade() -> None: + op.drop_constraint('uq_user_relevant_suggestions_url_id', 'user_relevant_suggestions', type_='unique') + op.drop_constraint('uq_user_record_type_suggestions_url_id', 'user_record_type_suggestions', type_='unique') + op.drop_constraint('uq_user_agency_suggestions_url_id', 'user_url_agency_suggestions', type_='unique') \ No newline at end of file diff --git a/collector_db/AsyncDatabaseClient.py b/collector_db/AsyncDatabaseClient.py index 98410b6f..c8b4a204 100644 --- a/collector_db/AsyncDatabaseClient.py +++ b/collector_db/AsyncDatabaseClient.py @@ -137,14 +137,13 @@ async def get_next_url_for_user_annotation( URL, ) .where(URL.outcome == URLStatus.PENDING.value) - # URL must not have metadata annotation by this user + # URL must not have user suggestion .where( not_( exists( select(user_suggestion_model_to_exclude) .where( user_suggestion_model_to_exclude.url_id == URL.id, - user_suggestion_model_to_exclude.user_id == user_id ) ) ) @@ -158,7 +157,6 @@ async def get_next_url_for_user_annotation( select(UserRelevantSuggestion) .where( UserRelevantSuggestion.url_id == URL.id, - UserRelevantSuggestion.user_id == user_id, UserRelevantSuggestion.relevant == False ) ) @@ -833,15 +831,14 @@ async def get_next_url_agency_for_annotation( if batch_id is not None: statement = statement.where(URL.batch_id == batch_id) - # Must not have been annotated by this user + # Must not have been annotated by a user statement = ( statement.join(UserUrlAgencySuggestion, isouter=True) .where( ~exists( select(UserUrlAgencySuggestion). where( - (UserUrlAgencySuggestion.user_id == user_id) & - (UserUrlAgencySuggestion.url_id == URL.id) + UserUrlAgencySuggestion.url_id == URL.id ). correlate(URL) ) diff --git a/core/DTOs/GetNextURLForFinalReviewResponse.py b/core/DTOs/GetNextURLForFinalReviewResponse.py index 422c38ab..f7f44e32 100644 --- a/core/DTOs/GetNextURLForFinalReviewResponse.py +++ b/core/DTOs/GetNextURLForFinalReviewResponse.py @@ -23,6 +23,7 @@ class FinalReviewAnnotationRecordTypeInfo(BaseModel): title="A dictionary, sorted by size and omitting zero values, of all record types suggested by users", ) +# region Agency class FinalReviewAnnotationAgencyUserInfo(GetNextURLForAgencyAgencyInfo): count: int = Field(title="Number of times suggested by users") @@ -41,6 +42,7 @@ class FinalReviewAnnotationAgencyInfo(BaseModel): users: Optional[dict[int, FinalReviewAnnotationAgencyUserInfo]] = Field( title="A list, sorted by size, of all agencies suggested by users", ) +# endregion class FinalReviewAnnotationInfo(BaseModel): relevant: FinalReviewAnnotationRelevantInfo = Field( diff --git a/core/TaskManager.py b/core/TaskManager.py index 7796e80e..8a40b129 100644 --- a/core/TaskManager.py +++ b/core/TaskManager.py @@ -7,13 +7,13 @@ from core.DTOs.GetTasksResponse import GetTasksResponse from core.DTOs.TaskOperatorRunInfo import TaskOperatorRunInfo, TaskOperatorOutcome from core.FunctionTrigger import FunctionTrigger -from core.classes.AgencyIdentificationTaskOperator import AgencyIdentificationTaskOperator -from core.classes.SubmitApprovedURLTaskOperator import SubmitApprovedURLTaskOperator -from core.classes.TaskOperatorBase import TaskOperatorBase -from core.classes.URLHTMLTaskOperator import URLHTMLTaskOperator -from core.classes.URLMiscellaneousMetadataTaskOperator import URLMiscellaneousMetadataTaskOperator -from core.classes.URLRecordTypeTaskOperator import URLRecordTypeTaskOperator -from core.classes.URLRelevanceHuggingfaceTaskOperator import URLRelevanceHuggingfaceTaskOperator +from core.classes.task_operators.AgencyIdentificationTaskOperator import AgencyIdentificationTaskOperator +from core.classes.task_operators.SubmitApprovedURLTaskOperator import SubmitApprovedURLTaskOperator +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase +from core.classes.task_operators.URLHTMLTaskOperator import URLHTMLTaskOperator +from core.classes.task_operators.URLMiscellaneousMetadataTaskOperator import URLMiscellaneousMetadataTaskOperator +from core.classes.task_operators.URLRecordTypeTaskOperator import URLRecordTypeTaskOperator +from core.classes.task_operators.URLRelevanceHuggingfaceTaskOperator import URLRelevanceHuggingfaceTaskOperator from core.enums import BatchStatus from html_tag_collector.ResponseParser import HTMLResponseParser from html_tag_collector.URLRequestInterface import URLRequestInterface diff --git a/core/classes/AgencyIdentificationTaskOperator.py b/core/classes/task_operators/AgencyIdentificationTaskOperator.py similarity index 98% rename from core/classes/AgencyIdentificationTaskOperator.py rename to core/classes/task_operators/AgencyIdentificationTaskOperator.py index 1589b96f..4c2d6f1b 100644 --- a/core/classes/AgencyIdentificationTaskOperator.py +++ b/core/classes/task_operators/AgencyIdentificationTaskOperator.py @@ -7,7 +7,7 @@ from collector_manager.enums import CollectorType from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo from core.DTOs.task_data_objects.AgencyIdentificationTDO import AgencyIdentificationTDO -from core.classes.TaskOperatorBase import TaskOperatorBase +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase from core.classes.subtasks.AutoGooglerAgencyIdentificationSubtask import AutoGooglerAgencyIdentificationSubtask from core.classes.subtasks.CKANAgencyIdentificationSubtask import CKANAgencyIdentificationSubtask from core.classes.subtasks.CommonCrawlerAgencyIdentificationSubtask import CommonCrawlerAgencyIdentificationSubtask diff --git a/core/classes/SubmitApprovedURLTaskOperator.py b/core/classes/task_operators/SubmitApprovedURLTaskOperator.py similarity index 97% rename from core/classes/SubmitApprovedURLTaskOperator.py rename to core/classes/task_operators/SubmitApprovedURLTaskOperator.py index 81f0b242..86e0229e 100644 --- a/core/classes/SubmitApprovedURLTaskOperator.py +++ b/core/classes/task_operators/SubmitApprovedURLTaskOperator.py @@ -2,7 +2,7 @@ from collector_db.DTOs.URLErrorInfos import URLErrorPydanticInfo from collector_db.enums import TaskType from core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmitApprovedURLTDO -from core.classes.TaskOperatorBase import TaskOperatorBase +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase from pdap_api_client.PDAPClient import PDAPClient diff --git a/core/classes/TaskOperatorBase.py b/core/classes/task_operators/TaskOperatorBase.py similarity index 100% rename from core/classes/TaskOperatorBase.py rename to core/classes/task_operators/TaskOperatorBase.py diff --git a/core/classes/URLHTMLTaskOperator.py b/core/classes/task_operators/URLHTMLTaskOperator.py similarity index 98% rename from core/classes/URLHTMLTaskOperator.py rename to core/classes/task_operators/URLHTMLTaskOperator.py index ad279f9d..f6cfa28a 100644 --- a/core/classes/URLHTMLTaskOperator.py +++ b/core/classes/task_operators/URLHTMLTaskOperator.py @@ -4,7 +4,7 @@ from collector_db.enums import TaskType from core.DTOs.task_data_objects.UrlHtmlTDO import UrlHtmlTDO from core.classes.HTMLContentInfoGetter import HTMLContentInfoGetter -from core.classes.TaskOperatorBase import TaskOperatorBase +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase from html_tag_collector.ResponseParser import HTMLResponseParser from html_tag_collector.URLRequestInterface import URLRequestInterface diff --git a/core/classes/URLMiscellaneousMetadataTaskOperator.py b/core/classes/task_operators/URLMiscellaneousMetadataTaskOperator.py similarity index 97% rename from core/classes/URLMiscellaneousMetadataTaskOperator.py rename to core/classes/task_operators/URLMiscellaneousMetadataTaskOperator.py index 1cbebbc6..68a3a243 100644 --- a/core/classes/URLMiscellaneousMetadataTaskOperator.py +++ b/core/classes/task_operators/URLMiscellaneousMetadataTaskOperator.py @@ -5,7 +5,7 @@ from collector_db.enums import TaskType from collector_manager.enums import CollectorType from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO -from core.classes.TaskOperatorBase import TaskOperatorBase +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase from core.classes.subtasks.MiscellaneousMetadata.AutoGooglerMiscMetadataSubtask import AutoGooglerMiscMetadataSubtask from core.classes.subtasks.MiscellaneousMetadata.CKANMiscMetadataSubtask import CKANMiscMetadataSubtask from core.classes.subtasks.MiscellaneousMetadata.MiscellaneousMetadataSubtaskBase import \ diff --git a/core/classes/URLRecordTypeTaskOperator.py b/core/classes/task_operators/URLRecordTypeTaskOperator.py similarity index 97% rename from core/classes/URLRecordTypeTaskOperator.py rename to core/classes/task_operators/URLRecordTypeTaskOperator.py index 3f94811f..ab1f1f08 100644 --- a/core/classes/URLRecordTypeTaskOperator.py +++ b/core/classes/task_operators/URLRecordTypeTaskOperator.py @@ -2,7 +2,7 @@ from collector_db.DTOs.URLErrorInfos import URLErrorPydanticInfo from collector_db.enums import TaskType from core.DTOs.task_data_objects.URLRecordTypeTDO import URLRecordTypeTDO -from core.classes.TaskOperatorBase import TaskOperatorBase +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase from core.enums import RecordType from llm_api_logic.OpenAIRecordClassifier import OpenAIRecordClassifier diff --git a/core/classes/URLRelevanceHuggingfaceTaskOperator.py b/core/classes/task_operators/URLRelevanceHuggingfaceTaskOperator.py similarity index 91% rename from core/classes/URLRelevanceHuggingfaceTaskOperator.py rename to core/classes/task_operators/URLRelevanceHuggingfaceTaskOperator.py index e6ebdc3d..4871a9f0 100644 --- a/core/classes/URLRelevanceHuggingfaceTaskOperator.py +++ b/core/classes/task_operators/URLRelevanceHuggingfaceTaskOperator.py @@ -1,9 +1,8 @@ from collector_db.AsyncDatabaseClient import AsyncDatabaseClient -from collector_db.DTOs.URLMetadataInfo import URLMetadataInfo from collector_db.DTOs.URLWithHTML import URLWithHTML -from collector_db.enums import URLMetadataAttributeType, ValidationStatus, ValidationSource, TaskType +from collector_db.enums import TaskType from core.DTOs.task_data_objects.URLRelevanceHuggingfaceTDO import URLRelevanceHuggingfaceTDO -from core.classes.TaskOperatorBase import TaskOperatorBase +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase from hugging_face.HuggingFaceInterface import HuggingFaceInterface diff --git a/core/classes/task_operators/__init__.py b/core/classes/task_operators/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/helpers/complex_test_data_functions.py b/tests/helpers/complex_test_data_functions.py index 18d3f92a..955e1cf6 100644 --- a/tests/helpers/complex_test_data_functions.py +++ b/tests/helpers/complex_test_data_functions.py @@ -1,3 +1,5 @@ +from typing import Optional + from pydantic import BaseModel from collector_db.DTOs.InsertURLsInfo import InsertURLsInfo @@ -57,7 +59,7 @@ class FinalReviewSetupInfo(BaseModel): async def setup_for_get_next_url_for_final_review( db_data_creator: DBDataCreator, - annotation_count: int, + annotation_count: Optional[int] = None, include_user_annotations: bool = True, include_miscellaneous_metadata: bool = True ) -> FinalReviewSetupInfo: @@ -109,16 +111,9 @@ async def add_relevant_suggestion(count: int, relevant: bool): ) if include_user_annotations: - await add_relevant_suggestion(annotation_count, True) await add_relevant_suggestion(1, False) - await add_record_type_suggestion(3, RecordType.ARREST_RECORDS) - await add_record_type_suggestion(2, RecordType.DISPATCH_RECORDINGS) await add_record_type_suggestion(1, RecordType.ACCIDENT_REPORTS) - - if include_user_annotations: - # Add user suggestions for agencies, one suggested by 3 users, another by 2, another by 1 - for i in range(annotation_count): - await add_agency_suggestion(i + 1) + await add_agency_suggestion(1) return FinalReviewSetupInfo( batch_id=batch_id, diff --git a/tests/manual/html_collector/test_html_tag_collector_integration.py b/tests/manual/html_collector/test_html_tag_collector_integration.py index 8f1fc630..3ffef203 100644 --- a/tests/manual/html_collector/test_html_tag_collector_integration.py +++ b/tests/manual/html_collector/test_html_tag_collector_integration.py @@ -2,7 +2,7 @@ from collector_db.AsyncDatabaseClient import AsyncDatabaseClient from collector_db.DTOs.URLInfo import URLInfo -from core.classes.URLHTMLTaskOperator import URLHTMLTaskOperator +from core.classes.task_operators.URLHTMLTaskOperator import URLHTMLTaskOperator from helpers.DBDataCreator import DBDataCreator from html_tag_collector.ResponseParser import HTMLResponseParser from html_tag_collector.RootURLCache import RootURLCache diff --git a/tests/test_automated/integration/api/test_annotate.py b/tests/test_automated/integration/api/test_annotate.py index 0501ac1f..d5b6dade 100644 --- a/tests/test_automated/integration/api/test_annotate.py +++ b/tests/test_automated/integration/api/test_annotate.py @@ -68,6 +68,9 @@ async def test_annotate_relevancy(api_test_helper): # Validate that the correct relevant value is returned assert inner_info_1.suggested_relevant is True + # A second user should see the same URL + + # Annotate with value 'False' and get next URL request_info_2: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next( url_id=inner_info_1.url_info.url_id, @@ -106,7 +109,6 @@ async def test_annotate_relevancy(api_test_helper): assert result_2.relevant is True # If user submits annotation for same URL, the URL should be overwritten - request_info_4: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next( url_id=inner_info_1.url_info.url_id, relevance_annotation_post_info=RelevanceAnnotationPostInfo( @@ -420,12 +422,6 @@ async def test_annotate_agency_other_user_annotation(api_test_helper): ) url_ids = setup_info.url_ids - - await ath.db_data_creator.manual_suggestion( - user_id=MOCK_USER_ID + 1, - url_id=url_ids[0], - ) - response = await ath.request_validator.get_next_agency_annotation() assert response.next_annotation @@ -440,6 +436,16 @@ async def test_annotate_agency_other_user_annotation(api_test_helper): # Check that one agency_suggestion exists assert len(next_annotation.agency_suggestions) == 1 + # Test that another user can insert a suggestion + await ath.db_data_creator.manual_suggestion( + user_id=MOCK_USER_ID + 1, + url_id=url_ids[0], + ) + + # After this, text that our user does not receive this URL + response = await ath.request_validator.get_next_agency_annotation() + assert response.next_annotation is None + @pytest.mark.asyncio async def test_annotate_agency_submit_and_get_next(api_test_helper): """ diff --git a/tests/test_automated/integration/api/test_review.py b/tests/test_automated/integration/api/test_review.py index 61b1ef7e..494765b6 100644 --- a/tests/test_automated/integration/api/test_review.py +++ b/tests/test_automated/integration/api/test_review.py @@ -15,7 +15,6 @@ async def test_review_next_source(api_test_helper): setup_info = await setup_for_get_next_url_for_final_review( db_data_creator=ath.db_data_creator, - annotation_count=3, include_user_annotations=True ) url_mapping = setup_info.url_mapping @@ -47,16 +46,13 @@ async def test_review_next_source(api_test_helper): annotation_info = result.annotations relevant_info = annotation_info.relevant assert relevant_info.auto == True - assert relevant_info.users.relevant == 3 assert relevant_info.users.not_relevant == 1 record_type_info = annotation_info.record_type assert record_type_info.auto == RecordType.ARREST_RECORDS user_d = record_type_info.users - assert user_d[RecordType.ARREST_RECORDS] == 3 - assert user_d[RecordType.DISPATCH_RECORDINGS] == 2 assert user_d[RecordType.ACCIDENT_REPORTS] == 1 - assert list(user_d.keys()) == [RecordType.ARREST_RECORDS, RecordType.DISPATCH_RECORDINGS, RecordType.ACCIDENT_REPORTS] + assert list(user_d.keys()) == [RecordType.ACCIDENT_REPORTS] agency_info = annotation_info.agency @@ -67,9 +63,7 @@ async def test_review_next_source(api_test_helper): # Check user agency suggestions exist and in descending order of count user_agency_suggestions = agency_info.users user_agency_suggestions_as_list = list(user_agency_suggestions.values()) - assert len(user_agency_suggestions_as_list) == 3 - for i in range(3): - assert user_agency_suggestions_as_list[i].count == 3 - i + assert len(user_agency_suggestions_as_list) == 1 # Check confirmed agencies exist confirmed_agencies = agency_info.confirmed diff --git a/tests/test_automated/integration/collector_db/test_db_client.py b/tests/test_automated/integration/collector_db/test_db_client.py index 7b98728f..71bed7b4 100644 --- a/tests/test_automated/integration/collector_db/test_db_client.py +++ b/tests/test_automated/integration/collector_db/test_db_client.py @@ -162,12 +162,12 @@ async def test_get_next_url_for_final_review_basic(db_data_creator: DBDataCreato setup_info = await setup_for_get_next_url_for_final_review( db_data_creator=db_data_creator, - annotation_count=3, + annotation_count=1, include_user_annotations=True ) url_mapping = setup_info.url_mapping - + # Add agency auto suggestions await db_data_creator.agency_auto_suggestions( url_id=url_mapping.url_id, count=3 @@ -186,16 +186,13 @@ async def test_get_next_url_for_final_review_basic(db_data_creator: DBDataCreato annotation_info = result.annotations relevant_info = annotation_info.relevant assert relevant_info.auto == True - assert relevant_info.users.relevant == 3 assert relevant_info.users.not_relevant == 1 record_type_info = annotation_info.record_type assert record_type_info.auto == RecordType.ARREST_RECORDS user_d = record_type_info.users - assert user_d[RecordType.ARREST_RECORDS] == 3 - assert user_d[RecordType.DISPATCH_RECORDINGS] == 2 assert user_d[RecordType.ACCIDENT_REPORTS] == 1 - assert list(user_d.keys()) == [RecordType.ARREST_RECORDS, RecordType.DISPATCH_RECORDINGS, RecordType.ACCIDENT_REPORTS] + assert list(user_d.keys()) == [RecordType.ACCIDENT_REPORTS] agency_info = annotation_info.agency @@ -206,9 +203,7 @@ async def test_get_next_url_for_final_review_basic(db_data_creator: DBDataCreato # Check user agency suggestions exist and in descending order of count user_agency_suggestions = agency_info.users user_agency_suggestions_as_list = list(user_agency_suggestions.values()) - assert len(user_agency_suggestions_as_list) == 3 - for i in range(3): - assert user_agency_suggestions_as_list[i].count == 3 - i + assert len(user_agency_suggestions_as_list) == 1 @pytest.mark.asyncio async def test_get_next_url_for_final_review_batch_id_filtering(db_data_creator: DBDataCreator): @@ -280,42 +275,6 @@ async def test_get_next_url_for_final_review_favor_more_components(db_data_creat -@pytest.mark.asyncio -async def test_get_next_url_for_final_review_favor_more_annotations( - db_data_creator: DBDataCreator, - wipe_database -): - """ - Test in the case of two URLs with the same number of components annotated, favoring the one with more total annotations - """ - setup_info_lower_count = await setup_for_get_next_url_for_final_review( - db_data_creator=db_data_creator, - annotation_count=1, - include_user_annotations=True - ) - url_mapping_lower_count = setup_info_lower_count.url_mapping - - setup_info_higher_count = await setup_for_get_next_url_for_final_review( - db_data_creator=db_data_creator, - annotation_count=3, - include_user_annotations=True - ) - url_mapping_higher_count = setup_info_higher_count.url_mapping - - for url_mapping in [url_mapping_lower_count, url_mapping_higher_count]: - await db_data_creator.agency_confirmed_suggestion( - url_id=url_mapping.url_id - ) - - result = await db_data_creator.adb_client.get_next_url_for_final_review( - batch_id=None - ) - - assert result.id == url_mapping_higher_count.url_id - - assert result.annotations.agency.confirmed is not None - - @pytest.mark.asyncio @@ -478,6 +437,11 @@ async def test_approval_url_error(db_data_creator: DBDataCreator): async def test_get_next_url_for_user_relevance_annotation_pending( db_data_creator: DBDataCreator ): + """ + Users should receive a valid URL to annotate + All users should receive the same next URL + Once any user annotates that URL, none of the users should receive it again + """ setup_info = await setup_for_get_next_url_for_annotation( db_data_creator=db_data_creator, url_count=2 @@ -492,11 +456,45 @@ async def test_get_next_url_for_user_relevance_annotation_pending( ) adb_client = db_data_creator.adb_client - url = await adb_client.get_next_url_for_relevance_annotation( + url_1 = await adb_client.get_next_url_for_relevance_annotation( + user_id=1, + batch_id=None + ) + assert url_1 is not None + + url_2 = await adb_client.get_next_url_for_relevance_annotation( + user_id=2, + batch_id=None + ) + assert url_2 is not None + + assert url_1.url_info.url == url_2.url_info.url + + # Annotate this URL, then check that the second URL is returned + await adb_client.add_user_relevant_suggestion( + url_id=url_1.url_info.url_id, + user_id=1, + relevant=True + ) + + url_3 = await adb_client.get_next_url_for_relevance_annotation( user_id=1, batch_id=None ) - assert url is not None + assert url_3 is not None + + assert url_1 != url_3 + + # Check that the second URL is also returned for another user + url_4 = await adb_client.get_next_url_for_relevance_annotation( + user_id=2, + batch_id=None + ) + assert url_4 is not None + + + assert url_4 == url_3 + @pytest.mark.asyncio async def test_get_next_url_for_annotation_batch_filtering( @@ -643,26 +641,27 @@ async def test_annotate_url_marked_not_relevant(db_data_creator: DBDataCreator): ) assert record_type_annotation_info.url_info.url_id != url_to_mark_not_relevant.url_id - # Other users should still receive the URL for record type annotation + # Other users also should not receive the URL for record type annotation record_type_annotation_info = await adb_client.get_next_url_for_record_type_annotation( user_id=2, batch_id=None ) - assert record_type_annotation_info.url_info.url_id == url_to_mark_not_relevant.url_id + assert record_type_annotation_info.url_info.url_id != \ + url_to_mark_not_relevant.url_id, "Other users should not receive the URL for record type annotation" # User should not receive the URL for agency annotation - agency_annotation_info = await adb_client.get_next_url_agency_for_annotation( + agency_annotation_info_user_1 = await adb_client.get_next_url_agency_for_annotation( user_id=1, batch_id=None ) - assert agency_annotation_info.next_annotation.url_id != url_to_mark_not_relevant.url_id + assert agency_annotation_info_user_1.next_annotation.url_id != url_to_mark_not_relevant.url_id - # Other users should still receive the URL for agency annotation - agency_annotation_info = await adb_client.get_next_url_agency_for_annotation( + # Other users also should not receive the URL for agency annotation + agency_annotation_info_user_2 = await adb_client.get_next_url_agency_for_annotation( user_id=2, batch_id=None ) - assert agency_annotation_info.next_annotation.url_id == url_to_mark_not_relevant.url_id + assert agency_annotation_info_user_1.next_annotation.url_id != url_to_mark_not_relevant.url_id @pytest.mark.asyncio async def test_annotate_url_agency_agency_not_in_db(db_data_creator: DBDataCreator): @@ -681,4 +680,117 @@ async def test_annotate_url_agency_agency_not_in_db(db_data_creator: DBDataCreat agencies = await db_data_creator.adb_client.get_all(Agency) assert len(agencies) - assert agencies[0].name == PLACEHOLDER_AGENCY_NAME \ No newline at end of file + assert agencies[0].name == PLACEHOLDER_AGENCY_NAME + +@pytest.mark.asyncio +async def test_get_next_url_for_user_record_type_annotation(db_data_creator: DBDataCreator): + """ + All users should receive the same next valid URL for record type annotation + Once any user annotates that URL, none of the users should receive it + """ + setup_info = await setup_for_get_next_url_for_annotation( + db_data_creator, + url_count=2 + ) + + # All users should receive the same URL + url_1 = setup_info.insert_urls_info.url_mappings[0] + url_2 = setup_info.insert_urls_info.url_mappings[1] + + adb_client = db_data_creator.adb_client + + url_user_1 = await adb_client.get_next_url_for_record_type_annotation( + user_id=1, + batch_id=None + ) + assert url_user_1 is not None + + url_user_2 = await adb_client.get_next_url_for_record_type_annotation( + user_id=2, + batch_id=None + ) + + assert url_user_2 is not None + + # Check that the URLs are the same + assert url_user_1 == url_user_2 + + # After annotating, both users should receive a different URL + await adb_client.add_user_record_type_suggestion( + user_id=1, + url_id=url_1.url_id, + record_type=RecordType.ARREST_RECORDS + ) + + next_url_user_1 = await adb_client.get_next_url_for_record_type_annotation( + user_id=1, + batch_id=None + ) + + next_url_user_2 = await adb_client.get_next_url_for_record_type_annotation( + user_id=2, + batch_id=None + ) + + assert next_url_user_1 != url_user_1 + assert next_url_user_1 == next_url_user_2 + + + + + +@pytest.mark.asyncio +async def test_get_next_url_for_user_agency_annotation(db_data_creator: DBDataCreator): + """ + All users should receive the same next valid URL for agency annotation + Once any user annotates that URL, none of the users should receive it + """ + setup_info = await setup_for_annotate_agency( + db_data_creator, + url_count=2 + ) + + # All users should receive the same URL + url_1 = setup_info.url_ids[0] + url_2 = setup_info.url_ids[1] + + adb_client = db_data_creator.adb_client + url_user_1 = await adb_client.get_next_url_agency_for_annotation( + user_id=1, + batch_id=None + ) + assert url_user_1 is not None + + url_user_2 = await adb_client.get_next_url_agency_for_annotation( + user_id=2, + batch_id=None + ) + + assert url_user_2 is not None + + # Check that the URLs are the same + assert url_user_1 == url_user_2 + + # Annotate the URL + await adb_client.add_agency_manual_suggestion( + url_id=url_1, + user_id=1, + is_new=True, + agency_id=None + ) + + # Both users should receive the next URL + next_url_user_1 = await adb_client.get_next_url_agency_for_annotation( + user_id=1, + batch_id=None + ) + assert next_url_user_1 is not None + + next_url_user_2 = await adb_client.get_next_url_agency_for_annotation( + user_id=2, + batch_id=None + ) + assert next_url_user_2 is not None + + assert url_user_1 != next_url_user_1 + assert next_url_user_1 == next_url_user_2 diff --git a/tests/test_automated/integration/tasks/test_agency_preannotation_task.py b/tests/test_automated/integration/tasks/test_agency_preannotation_task.py index 1c1289e7..8fb9f4a5 100644 --- a/tests/test_automated/integration/tasks/test_agency_preannotation_task.py +++ b/tests/test_automated/integration/tasks/test_agency_preannotation_task.py @@ -1,4 +1,3 @@ -import types from copy import deepcopy from typing import Optional from unittest.mock import MagicMock, AsyncMock, patch @@ -11,7 +10,7 @@ from collector_manager.enums import CollectorType from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo -from core.classes.AgencyIdentificationTaskOperator import AgencyIdentificationTaskOperator +from core.classes.task_operators.AgencyIdentificationTaskOperator import AgencyIdentificationTaskOperator from core.classes.subtasks.AutoGooglerAgencyIdentificationSubtask import AutoGooglerAgencyIdentificationSubtask from core.classes.subtasks.CKANAgencyIdentificationSubtask import CKANAgencyIdentificationSubtask from core.classes.subtasks.CommonCrawlerAgencyIdentificationSubtask import CommonCrawlerAgencyIdentificationSubtask diff --git a/tests/test_automated/integration/tasks/test_example_task.py b/tests/test_automated/integration/tasks/test_example_task.py index 819d0dc0..2211458c 100644 --- a/tests/test_automated/integration/tasks/test_example_task.py +++ b/tests/test_automated/integration/tasks/test_example_task.py @@ -4,8 +4,7 @@ from collector_db.enums import TaskType from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome -from core.classes.TaskOperatorBase import TaskOperatorBase -from core.enums import BatchStatus +from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase from tests.helpers.DBDataCreator import DBDataCreator class ExampleTaskOperator(TaskOperatorBase): diff --git a/tests/test_automated/integration/tasks/test_submit_approved_url_task.py b/tests/test_automated/integration/tasks/test_submit_approved_url_task.py index b15ff9d5..2d3aa192 100644 --- a/tests/test_automated/integration/tasks/test_submit_approved_url_task.py +++ b/tests/test_automated/integration/tasks/test_submit_approved_url_task.py @@ -8,7 +8,7 @@ from collector_manager.enums import URLStatus from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome -from core.classes.SubmitApprovedURLTaskOperator import SubmitApprovedURLTaskOperator +from core.classes.task_operators.SubmitApprovedURLTaskOperator import SubmitApprovedURLTaskOperator from core.enums import RecordType, SubmitResponseStatus from tests.helpers.DBDataCreator import BatchURLCreationInfo, DBDataCreator from pdap_api_client.AccessManager import AccessManager diff --git a/tests/test_automated/integration/tasks/test_url_html_task.py b/tests/test_automated/integration/tasks/test_url_html_task.py index 3839d0a6..4c33016b 100644 --- a/tests/test_automated/integration/tasks/test_url_html_task.py +++ b/tests/test_automated/integration/tasks/test_url_html_task.py @@ -6,7 +6,7 @@ from collector_db.AsyncDatabaseClient import AsyncDatabaseClient from collector_db.enums import TaskType from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome -from core.classes.URLHTMLTaskOperator import URLHTMLTaskOperator +from core.classes.task_operators.URLHTMLTaskOperator import URLHTMLTaskOperator from tests.helpers.DBDataCreator import DBDataCreator from html_tag_collector.DataClassTags import ResponseHTMLInfo from html_tag_collector.ResponseParser import HTMLResponseParser diff --git a/tests/test_automated/integration/tasks/test_url_miscellaneous_metadata_task.py b/tests/test_automated/integration/tasks/test_url_miscellaneous_metadata_task.py index 818d5aef..526efa70 100644 --- a/tests/test_automated/integration/tasks/test_url_miscellaneous_metadata_task.py +++ b/tests/test_automated/integration/tasks/test_url_miscellaneous_metadata_task.py @@ -5,7 +5,7 @@ from collector_db.models import URL, URLOptionalDataSourceMetadata from collector_manager.enums import CollectorType from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome -from core.classes.URLMiscellaneousMetadataTaskOperator import URLMiscellaneousMetadataTaskOperator +from core.classes.task_operators.URLMiscellaneousMetadataTaskOperator import URLMiscellaneousMetadataTaskOperator from tests.helpers.DBDataCreator import DBDataCreator diff --git a/tests/test_automated/integration/tasks/test_url_record_type_task.py b/tests/test_automated/integration/tasks/test_url_record_type_task.py index c56acec1..c941bcf7 100644 --- a/tests/test_automated/integration/tasks/test_url_record_type_task.py +++ b/tests/test_automated/integration/tasks/test_url_record_type_task.py @@ -5,8 +5,8 @@ from collector_db.enums import TaskType from collector_db.models import AutoRecordTypeSuggestion from core.DTOs.TaskOperatorRunInfo import TaskOperatorOutcome -from core.classes.URLRecordTypeTaskOperator import URLRecordTypeTaskOperator -from core.enums import RecordType, BatchStatus +from core.classes.task_operators.URLRecordTypeTaskOperator import URLRecordTypeTaskOperator +from core.enums import RecordType from tests.helpers.DBDataCreator import DBDataCreator from llm_api_logic.DeepSeekRecordClassifier import DeepSeekRecordClassifier diff --git a/tests/test_automated/integration/tasks/test_url_relevancy_huggingface_task.py b/tests/test_automated/integration/tasks/test_url_relevancy_huggingface_task.py index 11ef770a..abe15965 100644 --- a/tests/test_automated/integration/tasks/test_url_relevancy_huggingface_task.py +++ b/tests/test_automated/integration/tasks/test_url_relevancy_huggingface_task.py @@ -4,10 +4,9 @@ from collector_db.AsyncDatabaseClient import AsyncDatabaseClient from collector_db.DTOs.URLWithHTML import URLWithHTML -from collector_db.enums import ValidationStatus, ValidationSource from collector_db.models import AutoRelevantSuggestion from core.DTOs.TaskOperatorRunInfo import TaskOperatorRunInfo, TaskOperatorOutcome -from core.classes.URLRelevanceHuggingfaceTaskOperator import URLRelevanceHuggingfaceTaskOperator +from core.classes.task_operators.URLRelevanceHuggingfaceTaskOperator import URLRelevanceHuggingfaceTaskOperator from tests.helpers.assert_functions import assert_database_has_no_tasks from hugging_face.HuggingFaceInterface import HuggingFaceInterface