diff --git a/alembic/versions/2025_05_16_1031-00cc949e0347_update_relevancy_logic.py b/alembic/versions/2025_05_16_1031-00cc949e0347_update_relevancy_logic.py new file mode 100644 index 00000000..5ba1240f --- /dev/null +++ b/alembic/versions/2025_05_16_1031-00cc949e0347_update_relevancy_logic.py @@ -0,0 +1,162 @@ +"""Update relevancy logic + +- Add new status to URL Status outcome: `individual record` +- Change URL Status value `rejected` to `not relevant` , for specificity +- Create `user_suggested_status` enum +- ` Add `suggested_status` column to `user_relevant_suggestions` +- Migrate `user_relevant_suggestions:relevant` to `user_relevant_suggestions:user_suggested_status` + +Revision ID: 00cc949e0347 +Revises: b5f079b6b8cb +Create Date: 2025-05-16 10:31:04.417203 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from util.alembic_helpers import switch_enum_type + +# revision identifiers, used by Alembic. +revision: str = '00cc949e0347' +down_revision: Union[str, None] = 'b5f079b6b8cb' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +suggested_status_enum = sa.Enum( + 'relevant', + 'not relevant', + 'individual record', + 'broken page/404 not found', + name='suggested_status' +) + +def upgrade() -> None: + suggested_status_enum.create(op.get_bind()) + # Replace `relevant` column with `suggested_status` column + op.add_column( + 'user_relevant_suggestions', + sa.Column( + 'suggested_status', + suggested_status_enum, + nullable=True + ) + ) + # Migrate existing entries + op.execute(""" + UPDATE user_relevant_suggestions + SET suggested_status = 'relevant' + WHERE relevant = true + """) + op.execute(""" + UPDATE user_relevant_suggestions + SET suggested_status = 'not relevant' + WHERE relevant = false + """) + op.alter_column( + 'user_relevant_suggestions', + 'suggested_status', + nullable=False + ) + op.drop_column( + 'user_relevant_suggestions', + 'relevant' + ) + + # Update `url_status` enum to include + # `individual record` + # And change `rejected` to `not relevant` + op.execute(""" + ALTER TYPE url_status RENAME VALUE 'rejected' TO 'not relevant'; + """) + switch_enum_type( + table_name='urls', + column_name='outcome', + enum_name='url_status', + new_enum_values=[ + 'pending', + 'submitted', + 'validated', + 'duplicate', + 'not relevant', + 'error', + '404 not found', + 'individual record' + ], + check_constraints_to_drop=['url_name_not_null_when_validated'] + ) + op.execute( + """ + ALTER TABLE urls + ADD CONSTRAINT url_name_not_null_when_validated + CHECK ((name IS NOT NULL) OR (outcome <> 'validated'::url_status)) + """ + ) + + +def downgrade() -> None: + # Update `url_status` enum to remove + # `individual record` + # And change `not relevant` to `rejected` + op.execute(""" + ALTER TYPE url_status RENAME VALUE 'not relevant' TO 'rejected'; + """) + op.execute(""" + UPDATE urls + SET outcome = 'rejected' + WHERE outcome = 'individual record' + """) + switch_enum_type( + table_name='urls', + column_name='outcome', + enum_name='url_status', + new_enum_values=[ + 'pending', + 'submitted', + 'validated', + 'duplicate', + 'rejected', + 'error', + '404 not found', + ], + check_constraints_to_drop=['url_name_not_null_when_validated'] + ) + op.execute( + """ + ALTER TABLE urls + ADD CONSTRAINT url_name_not_null_when_validated + CHECK ((name IS NOT NULL) OR (outcome <> 'validated'::url_status)) + """ + ) + + # Replace `suggested_status` column with `relevant` column + op.add_column( + 'user_relevant_suggestions', + sa.Column( + 'relevant', + sa.BOOLEAN(), + nullable=True + ) + ) + op.execute(""" + UPDATE user_relevant_suggestions + SET relevant = true + WHERE suggested_status = 'relevant' + """) + op.execute(""" + UPDATE user_relevant_suggestions + SET relevant = false + WHERE suggested_status = 'not relevant' + """) + op.alter_column( + 'user_relevant_suggestions', + 'relevant', + nullable=False + ) + op.drop_column( + 'user_relevant_suggestions', + 'suggested_status' + ) + suggested_status_enum.drop(op.get_bind(), checkfirst=True) diff --git a/api/routes/annotate.py b/api/routes/annotate.py index 95512a0b..7cb5fa65 100644 --- a/api/routes/annotate.py +++ b/api/routes/annotate.py @@ -55,7 +55,7 @@ async def annotate_url_for_relevance_and_get_next_url( await async_core.submit_url_relevance_annotation( user_id=access_info.user_id, url_id=url_id, - relevant=relevance_annotation_post_info.is_relevant + suggested_status=relevance_annotation_post_info.suggested_status ) return await async_core.get_next_url_for_relevance_annotation( user_id=access_info.user_id, diff --git a/api/routes/review.py b/api/routes/review.py index 62bf5de6..ac937701 100644 --- a/api/routes/review.py +++ b/api/routes/review.py @@ -4,7 +4,7 @@ from api.dependencies import get_async_core from core.AsyncCore import AsyncCore -from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewBaseInfo +from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewBaseInfo, FinalReviewRejectionInfo from core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewResponse, \ GetNextURLForFinalReviewOuterResponse from security_manager.SecurityManager import AccessInfo, get_access_info, require_permission, Permissions @@ -50,7 +50,7 @@ async def approve_source( async def reject_source( core: AsyncCore = Depends(get_async_core), access_info: AccessInfo = Depends(requires_final_review_permission), - review_info: FinalReviewBaseInfo = FinalReviewBaseInfo, + review_info: FinalReviewRejectionInfo = FinalReviewRejectionInfo, batch_id: Optional[int] = Query( description="The batch id of the next URL to get. " "If not specified, defaults to first qualifying URL", @@ -59,6 +59,7 @@ async def reject_source( await core.reject_url( url_id=review_info.url_id, access_info=access_info, + rejection_reason=review_info.rejection_reason ) next_source = await core.get_next_source_for_review(batch_id=batch_id) return GetNextURLForFinalReviewOuterResponse(next_source=next_source) \ No newline at end of file diff --git a/collector_db/AsyncDatabaseClient.py b/collector_db/AsyncDatabaseClient.py index 2fad609d..b47f9ae4 100644 --- a/collector_db/AsyncDatabaseClient.py +++ b/collector_db/AsyncDatabaseClient.py @@ -33,7 +33,7 @@ BacklogSnapshot, URLDataSource, URLCheckedForDuplicate, URLProbedFor404 from collector_manager.enums import URLStatus, CollectorType from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo -from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo +from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, RejectionReason from core.DTOs.GetMetricsBacklogResponse import GetMetricsBacklogResponseDTO, GetMetricsBacklogResponseInnerDTO from core.DTOs.GetMetricsBatchesAggregatedResponseDTO import GetMetricsBatchesAggregatedResponseDTO, \ GetMetricsBatchesAggregatedInnerResponseDTO @@ -65,7 +65,7 @@ from core.DTOs.task_data_objects.URLDuplicateTDO import URLDuplicateTDO from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO, URLHTMLMetadataInfo from core.EnvVarManager import EnvVarManager -from core.enums import BatchStatus, SuggestionType, RecordType +from core.enums import BatchStatus, SuggestionType, RecordType, SuggestedStatus from html_tag_collector.DataClassTags import convert_to_response_html_info # Type Hints @@ -170,7 +170,7 @@ async def get_next_url_for_user_annotation( select(UserRelevantSuggestion) .where( UserRelevantSuggestion.url_id == URL.id, - UserRelevantSuggestion.relevant == False + UserRelevantSuggestion.suggested_status != SuggestedStatus.RELEVANT.value ) ) ) @@ -194,7 +194,7 @@ async def add_user_relevant_suggestion( session: AsyncSession, url_id: int, user_id: int, - relevant: bool + suggested_status: SuggestedStatus ): prior_suggestion = await self.get_user_suggestion( session, @@ -203,13 +203,13 @@ async def add_user_relevant_suggestion( url_id=url_id ) if prior_suggestion is not None: - prior_suggestion.relevant = relevant + prior_suggestion.suggested_status = suggested_status.value return suggestion = UserRelevantSuggestion( url_id=url_id, user_id=user_id, - relevant=relevant + suggested_status=suggested_status.value ) session.add(suggestion) @@ -881,7 +881,7 @@ async def get_next_url_agency_for_annotation( where( (UserRelevantSuggestion.user_id == user_id) & (UserRelevantSuggestion.url_id == URL.id) & - (UserRelevantSuggestion.relevant == False) + (UserRelevantSuggestion.suggested_status != SuggestedStatus.RELEVANT.value) ).correlate(URL) ) ) @@ -1288,7 +1288,8 @@ async def reject_url( self, session: AsyncSession, url_id: int, - user_id: int + user_id: int, + rejection_reason: RejectionReason ) -> None: query = ( @@ -1299,7 +1300,18 @@ async def reject_url( url = await session.execute(query) url = url.scalars().first() - url.outcome = URLStatus.REJECTED.value + match rejection_reason: + case RejectionReason.INDIVIDUAL_RECORD: + url.outcome = URLStatus.INDIVIDUAL_RECORD.value + case RejectionReason.BROKEN_PAGE_404: + url.outcome = URLStatus.NOT_FOUND.value + case RejectionReason.NOT_RELEVANT: + url.outcome = URLStatus.NOT_RELEVANT.value + case _: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Invalid rejection reason" + ) # Add rejecting user rejecting_user_url = ReviewingUserURL( @@ -1741,12 +1753,12 @@ async def add_all_annotations_to_url( relevant_suggestion = UserRelevantSuggestion( url_id=url_id, user_id=user_id, - relevant=post_info.is_relevant + suggested_status=post_info.suggested_status.value ) session.add(relevant_suggestion) # If not relevant, do nothing else - if not post_info.is_relevant: + if not post_info.suggested_status == SuggestedStatus.RELEVANT: return record_type_suggestion = UserRecordTypeSuggestion( @@ -1872,7 +1884,7 @@ def url_column(status: URLStatus, label): url_column(URLStatus.ERROR, label="error_count"), url_column(URLStatus.VALIDATED, label="validated_count"), url_column(URLStatus.SUBMITTED, label="submitted_count"), - url_column(URLStatus.REJECTED, label="rejected_count"), + url_column(URLStatus.NOT_RELEVANT, label="rejected_count"), ).outerjoin( Batch, Batch.id == URL.batch_id @@ -1957,7 +1969,7 @@ def url_column(status: URLStatus, label): sc.count_distinct(URL.id, label="count_total"), url_column(URLStatus.PENDING, label="count_pending"), url_column(URLStatus.SUBMITTED, label="count_submitted"), - url_column(URLStatus.REJECTED, label="count_rejected"), + url_column(URLStatus.NOT_RELEVANT, label="count_rejected"), url_column(URLStatus.ERROR, label="count_error"), url_column(URLStatus.VALIDATED, label="count_validated"), ).group_by( @@ -2075,7 +2087,7 @@ def case_column(status: URLStatus, label): case_column(URLStatus.PENDING, label="count_pending"), case_column(URLStatus.SUBMITTED, label="count_submitted"), case_column(URLStatus.VALIDATED, label="count_validated"), - case_column(URLStatus.REJECTED, label="count_rejected"), + case_column(URLStatus.NOT_RELEVANT, label="count_rejected"), case_column(URLStatus.ERROR, label="count_error"), ) raw_results = await session.execute(count_query) diff --git a/collector_db/DTOConverter.py b/collector_db/DTOConverter.py index 307e8f85..d06ac6de 100644 --- a/collector_db/DTOConverter.py +++ b/collector_db/DTOConverter.py @@ -26,7 +26,7 @@ def final_review_annotation_relevant_info( ) -> FinalReviewAnnotationRelevantInfo: auto_value = auto_suggestion.relevant if auto_suggestion else None - user_value = user_suggestion.relevant if user_suggestion else None + user_value = user_suggestion.suggested_status if user_suggestion else None return FinalReviewAnnotationRelevantInfo( auto=auto_value, user=user_value diff --git a/collector_db/models.py b/collector_db/models.py index de4dedae..9134a0ad 100644 --- a/collector_db/models.py +++ b/collector_db/models.py @@ -96,10 +96,11 @@ class URL(Base): 'pending', 'submitted', 'validated', - 'rejected', + 'not relevant', 'duplicate', 'error', '404 not found', + 'individual record', name='url_status' ), nullable=False @@ -457,7 +458,16 @@ class UserRelevantSuggestion(Base): id = Column(Integer, primary_key=True, autoincrement=True) url_id = Column(Integer, ForeignKey("urls.id"), nullable=False) user_id = Column(Integer, nullable=False) - relevant = Column(Boolean, nullable=False) + suggested_status = Column( + postgresql.ENUM( + 'relevant', + 'not relevant', + 'individual record', + 'broken page/404 not found', + name='suggested_status' + ), + nullable=True + ) created_at = get_created_at_column() updated_at = get_updated_at_column() diff --git a/collector_manager/enums.py b/collector_manager/enums.py index 3e852b24..1732bd19 100644 --- a/collector_manager/enums.py +++ b/collector_manager/enums.py @@ -16,5 +16,6 @@ class URLStatus(Enum): VALIDATED = "validated" ERROR = "error" DUPLICATE = "duplicate" - REJECTED = "rejected" + NOT_RELEVANT = "not relevant" NOT_FOUND = "404 not found" + INDIVIDUAL_RECORD = "individual record" diff --git a/core/AsyncCore.py b/core/AsyncCore.py index e7b7f534..651cd3ba 100644 --- a/core/AsyncCore.py +++ b/core/AsyncCore.py @@ -11,7 +11,7 @@ from collector_manager.enums import CollectorType from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo from core.DTOs.CollectorStartInfo import CollectorStartInfo -from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo +from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, RejectionReason from core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse from core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse from core.DTOs.GetDuplicatesByBatchResponse import GetDuplicatesByBatchResponse @@ -35,7 +35,7 @@ from core.DTOs.SearchURLResponse import SearchURLResponse from core.TaskManager import TaskManager from core.classes.ErrorManager import ErrorManager -from core.enums import BatchStatus, RecordType, AnnotationType +from core.enums import BatchStatus, RecordType, AnnotationType, SuggestedStatus from security_manager.SecurityManager import AccessInfo @@ -155,13 +155,13 @@ async def submit_url_relevance_annotation( self, user_id: int, url_id: int, - relevant: bool + suggested_status: SuggestedStatus ): try: return await self.adb_client.add_user_relevant_suggestion( user_id=user_id, url_id=url_id, - relevant=relevant + suggested_status=suggested_status ) except IntegrityError as e: return await ErrorManager.raise_annotation_exists_error( @@ -287,10 +287,12 @@ async def reject_url( self, url_id: int, access_info: AccessInfo, + rejection_reason: RejectionReason ): await self.adb_client.reject_url( url_id=url_id, - user_id=access_info.user_id + user_id=access_info.user_id, + rejection_reason=rejection_reason ) async def upload_manual_batch( diff --git a/core/DTOs/AllAnnotationPostInfo.py b/core/DTOs/AllAnnotationPostInfo.py index a462b40b..2a81be78 100644 --- a/core/DTOs/AllAnnotationPostInfo.py +++ b/core/DTOs/AllAnnotationPostInfo.py @@ -5,31 +5,31 @@ from pydantic import BaseModel, model_validator from core.DTOs.GetNextURLForAgencyAnnotationResponse import URLAgencyAnnotationPostInfo -from core.enums import RecordType +from core.enums import RecordType, SuggestedStatus from core.exceptions import FailedValidationException class AllAnnotationPostInfo(BaseModel): - is_relevant: bool + suggested_status: SuggestedStatus record_type: Optional[RecordType] = None agency: Optional[URLAgencyAnnotationPostInfo] = None - @model_validator(mode="before") - def allow_record_type_and_agency_only_if_relevant(cls, values): - is_relevant = values.get("is_relevant") - record_type = values.get("record_type") - agency = values.get("agency") + @model_validator(mode="after") + def allow_record_type_and_agency_only_if_relevant(self): + suggested_status = self.suggested_status + record_type = self.record_type + agency = self.agency - if not is_relevant: + if suggested_status != SuggestedStatus.RELEVANT: if record_type is not None: - raise FailedValidationException("record_type must be None if is_relevant is False") + raise FailedValidationException("record_type must be None if suggested_status is not relevant") if agency is not None: - raise FailedValidationException("agency must be None if is_relevant is False") - return values + raise FailedValidationException("agency must be None if suggested_status is not relevant") + return self # Similarly, if relevant, record_type and agency must be provided if record_type is None: - raise FailedValidationException("record_type must be provided if is_relevant is True") + raise FailedValidationException("record_type must be provided if suggested_status is relevant") if agency is None: - raise FailedValidationException("agency must be provided if is_relevant is True") - return values \ No newline at end of file + raise FailedValidationException("agency must be provided if suggested_status is relevant") + return self \ No newline at end of file diff --git a/core/DTOs/FinalReviewApprovalInfo.py b/core/DTOs/FinalReviewApprovalInfo.py index d87fb628..5e4a19d6 100644 --- a/core/DTOs/FinalReviewApprovalInfo.py +++ b/core/DTOs/FinalReviewApprovalInfo.py @@ -1,3 +1,4 @@ +from enum import Enum from typing import Optional from pydantic import BaseModel, Field @@ -9,6 +10,14 @@ class FinalReviewBaseInfo(BaseModel): title="The id of the URL." ) +class RejectionReason(Enum): + NOT_RELEVANT = "NOT_RELEVANT" + BROKEN_PAGE_404 = "BROKEN_PAGE" + INDIVIDUAL_RECORD = "INDIVIDUAL_RECORD" + +class FinalReviewRejectionInfo(FinalReviewBaseInfo): + rejection_reason: RejectionReason = RejectionReason.NOT_RELEVANT + class FinalReviewApprovalInfo(FinalReviewBaseInfo): record_type: Optional[RecordType] = Field( title="The final record type of the URL." diff --git a/core/DTOs/GetNextURLForFinalReviewResponse.py b/core/DTOs/GetNextURLForFinalReviewResponse.py index c9e838b6..f7e84d1f 100644 --- a/core/DTOs/GetNextURLForFinalReviewResponse.py +++ b/core/DTOs/GetNextURLForFinalReviewResponse.py @@ -3,13 +3,13 @@ from pydantic import BaseModel, Field from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAgencyInfo -from core.enums import RecordType +from core.enums import RecordType, SuggestedStatus from html_tag_collector.DataClassTags import ResponseHTMLInfo class FinalReviewAnnotationRelevantInfo(BaseModel): auto: Optional[bool] = Field(title="Whether the auto-labeler has marked the URL as relevant") - user: Optional[bool] = Field( - title="Whether a user has marked the URL as relevant", + user: Optional[SuggestedStatus] = Field( + title="The status marked by a user, if any", ) class FinalReviewAnnotationRecordTypeInfo(BaseModel): diff --git a/core/DTOs/RelevanceAnnotationPostInfo.py b/core/DTOs/RelevanceAnnotationPostInfo.py index 1aec0d51..29d0e764 100644 --- a/core/DTOs/RelevanceAnnotationPostInfo.py +++ b/core/DTOs/RelevanceAnnotationPostInfo.py @@ -1,5 +1,7 @@ from pydantic import BaseModel +from core.enums import SuggestedStatus + class RelevanceAnnotationPostInfo(BaseModel): - is_relevant: bool \ No newline at end of file + suggested_status: SuggestedStatus \ No newline at end of file diff --git a/core/enums.py b/core/enums.py index 019572b8..c6f90c80 100644 --- a/core/enums.py +++ b/core/enums.py @@ -70,4 +70,13 @@ class SubmitResponseStatus(Enum): """ SUCCESS = "success" FAILURE = "FAILURE" - ALREADY_EXISTS = "already_exists" \ No newline at end of file + ALREADY_EXISTS = "already_exists" + +class SuggestedStatus(Enum): + """ + Possible values for user_relevant_suggestions:suggested_status + """ + RELEVANT = "relevant" + NOT_RELEVANT = "not relevant" + INDIVIDUAL_RECORD = "individual record" + BROKEN_PAGE_404 = "broken page/404 not found" \ No newline at end of file diff --git a/tests/helpers/DBDataCreator.py b/tests/helpers/DBDataCreator.py index 38d70cfe..2ccf47fa 100644 --- a/tests/helpers/DBDataCreator.py +++ b/tests/helpers/DBDataCreator.py @@ -16,11 +16,11 @@ from collector_db.DatabaseClient import DatabaseClient from collector_db.enums import TaskType from collector_manager.enums import CollectorType, URLStatus -from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo +from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, RejectionReason from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo from core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmittedURLInfo from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO -from core.enums import BatchStatus, SuggestionType, RecordType +from core.enums import BatchStatus, SuggestionType, RecordType, SuggestedStatus from tests.helpers.test_batch_creation_parameters import TestBatchCreationParameters, AnnotationInfo from tests.helpers.simple_test_data_functions import generate_test_urls @@ -175,7 +175,7 @@ async def auto_relevant_suggestions(self, url_id: int, relevant: bool = True): async def annotate(self, url_id: int, annotation_info: AnnotationInfo): info = annotation_info if info.user_relevant is not None: - await self.user_relevant_suggestion(url_id=url_id, relevant=info.user_relevant) + await self.user_relevant_suggestion_v2(url_id=url_id, suggested_status=info.user_relevant) if info.auto_relevant is not None: await self.auto_relevant_suggestions(url_id=url_id, relevant=info.auto_relevant) if info.user_record_type is not None: @@ -203,7 +203,8 @@ async def annotate(self, url_id: int, annotation_info: AnnotationInfo): else: await self.adb_client.reject_url( url_id=url_id, - user_id=1 + user_id=1, + rejection_reason=RejectionReason.NOT_RELEVANT ) @@ -212,13 +213,25 @@ async def user_relevant_suggestion( url_id: int, user_id: Optional[int] = None, relevant: bool = True + ): + await self.user_relevant_suggestion_v2( + url_id=url_id, + user_id=user_id, + suggested_status=SuggestedStatus.RELEVANT if relevant else SuggestedStatus.NOT_RELEVANT + ) + + async def user_relevant_suggestion_v2( + self, + url_id: int, + user_id: Optional[int] = None, + suggested_status: SuggestedStatus = SuggestedStatus.RELEVANT ): if user_id is None: user_id = randint(1, 99999999) await self.adb_client.add_user_relevant_suggestion( url_id=url_id, user_id=user_id, - relevant=relevant + suggested_status=suggested_status ) async def user_record_type_suggestion( diff --git a/tests/helpers/test_batch_creation_parameters.py b/tests/helpers/test_batch_creation_parameters.py index cfb4805e..5d679569 100644 --- a/tests/helpers/test_batch_creation_parameters.py +++ b/tests/helpers/test_batch_creation_parameters.py @@ -4,11 +4,11 @@ from pydantic import BaseModel, model_validator from collector_manager.enums import URLStatus, CollectorType -from core.enums import BatchStatus, AnnotationType, RecordType +from core.enums import BatchStatus, AnnotationType, RecordType, SuggestedStatus class AnnotationInfo(BaseModel): - user_relevant: Optional[bool] = None + user_relevant: Optional[SuggestedStatus] = None auto_relevant: Optional[bool] = None user_record_type: Optional[RecordType] = None auto_record_type: Optional[RecordType] = None @@ -37,7 +37,7 @@ class TestURLCreationParameters(BaseModel): @model_validator(mode='after') def validate_annotation_info(self): - if self.status == URLStatus.REJECTED: + if self.status == URLStatus.NOT_RELEVANT: self.annotation_info.final_review_approved = False return self if self.status != URLStatus.VALIDATED: diff --git a/tests/test_automated/integration/api/helpers/RequestValidator.py b/tests/test_automated/integration/api/helpers/RequestValidator.py index 9207305a..3717da50 100644 --- a/tests/test_automated/integration/api/helpers/RequestValidator.py +++ b/tests/test_automated/integration/api/helpers/RequestValidator.py @@ -12,7 +12,7 @@ from collector_manager.DTOs.ExampleInputDTO import ExampleInputDTO from collector_manager.enums import CollectorType from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo -from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewBaseInfo +from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewBaseInfo, FinalReviewRejectionInfo from core.DTOs.GetBatchLogsResponse import GetBatchLogsResponse from core.DTOs.GetBatchStatusResponse import GetBatchStatusResponse from core.DTOs.GetDuplicatesByBatchResponse import GetDuplicatesByBatchResponse @@ -350,7 +350,7 @@ async def approve_and_get_next_source_for_review( async def reject_and_get_next_source_for_review( self, - review_info: FinalReviewBaseInfo + review_info: FinalReviewRejectionInfo ) -> GetNextURLForFinalReviewOuterResponse: data = self.post( url=f"/review/reject-source", diff --git a/tests/test_automated/integration/api/test_annotate.py b/tests/test_automated/integration/api/test_annotate.py index 03088cd7..90181951 100644 --- a/tests/test_automated/integration/api/test_annotate.py +++ b/tests/test_automated/integration/api/test_annotate.py @@ -13,7 +13,7 @@ from core.DTOs.RecordTypeAnnotationPostInfo import RecordTypeAnnotationPostInfo from core.DTOs.RelevanceAnnotationPostInfo import RelevanceAnnotationPostInfo from core.classes.ErrorManager import ErrorTypes -from core.enums import RecordType, SuggestionType +from core.enums import RecordType, SuggestionType, SuggestedStatus from core.exceptions import FailedValidationException from tests.helpers.complex_test_data_functions import AnnotateAgencySetupInfo, setup_for_annotate_agency, \ setup_for_get_next_url_for_final_review @@ -81,7 +81,7 @@ async def test_annotate_relevancy(api_test_helper): request_info_2: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next( url_id=inner_info_1.url_info.url_id, relevance_annotation_post_info=RelevanceAnnotationPostInfo( - is_relevant=False + suggested_status=SuggestedStatus.NOT_RELEVANT ) ) @@ -96,7 +96,7 @@ async def test_annotate_relevancy(api_test_helper): request_info_3: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next( url_id=inner_info_2.url_info.url_id, relevance_annotation_post_info=RelevanceAnnotationPostInfo( - is_relevant=True + suggested_status=SuggestedStatus.RELEVANT ) ) @@ -109,16 +109,16 @@ async def test_annotate_relevancy(api_test_helper): result_2 = results[1] assert result_1.url_id == inner_info_1.url_info.url_id - assert result_1.relevant is False + assert result_1.suggested_status == SuggestedStatus.NOT_RELEVANT.value assert result_2.url_id == inner_info_2.url_info.url_id - assert result_2.relevant is True + assert result_2.suggested_status == SuggestedStatus.RELEVANT.value # If user submits annotation for same URL, the URL should be overwritten request_info_4: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next( url_id=inner_info_1.url_info.url_id, relevance_annotation_post_info=RelevanceAnnotationPostInfo( - is_relevant=True + suggested_status=SuggestedStatus.RELEVANT ) ) @@ -129,8 +129,47 @@ async def test_annotate_relevancy(api_test_helper): for result in results: if result.url_id == inner_info_1.url_info.url_id: - assert results[0].relevant is True + assert results[0].suggested_status == SuggestedStatus.RELEVANT.value +async def post_and_validate_relevancy_annotation(ath, url_id, annotation: SuggestedStatus): + response = ath.request_validator.post_relevance_annotation_and_get_next( + url_id=url_id, + relevance_annotation_post_info=RelevanceAnnotationPostInfo( + suggested_status=annotation + ) + ) + + assert response.next_annotation is None + + results: list[UserRelevantSuggestion] = await ath.adb_client().get_all(UserRelevantSuggestion) + assert len(results) == 1 + assert results[0].suggested_status == annotation.value + +@pytest.mark.asyncio +async def test_annotate_relevancy_broken_page(api_test_helper): + ath = api_test_helper + + creation_info = await ath.db_data_creator.batch_and_urls(url_count=1, with_html_content=False) + + await post_and_validate_relevancy_annotation( + ath, + url_id=creation_info.url_ids[0], + annotation=SuggestedStatus.BROKEN_PAGE_404 + ) + +@pytest.mark.asyncio +async def test_annotate_relevancy_individual_record(api_test_helper): + ath = api_test_helper + + creation_info: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls( + url_count=1 + ) + + await post_and_validate_relevancy_annotation( + ath, + url_id=creation_info.url_ids[0], + annotation=SuggestedStatus.INDIVIDUAL_RECORD + ) @pytest.mark.asyncio async def test_annotate_relevancy_already_annotated_by_different_user( @@ -153,7 +192,7 @@ async def test_annotate_relevancy_already_annotated_by_different_user( response = await ath.request_validator.post_relevance_annotation_and_get_next( url_id=creation_info.url_ids[0], relevance_annotation_post_info=RelevanceAnnotationPostInfo( - is_relevant=False + suggested_status=SuggestedStatus.NOT_RELEVANT ) ) except HTTPException as e: @@ -613,7 +652,7 @@ async def test_annotate_all(api_test_helper): post_response_1 = await ath.request_validator.post_all_annotations_and_get_next( url_id=url_mapping_1.url_id, all_annotations_post_info=AllAnnotationPostInfo( - is_relevant=True, + suggested_status=SuggestedStatus.RELEVANT, record_type=RecordType.ACCIDENT_REPORTS, agency=URLAgencyAnnotationPostInfo( is_new=False, @@ -630,7 +669,7 @@ async def test_annotate_all(api_test_helper): post_response_2 = await ath.request_validator.post_all_annotations_and_get_next( url_id=url_mapping_2.url_id, all_annotations_post_info=AllAnnotationPostInfo( - is_relevant=False, + suggested_status=SuggestedStatus.NOT_RELEVANT, ) ) assert post_response_2.next_annotation is None @@ -642,10 +681,10 @@ async def test_annotate_all(api_test_helper): # Check that all annotations are present in the database # Should be two relevance annotations, one True and one False - all_relevance_suggestions = await adb_client.get_all(UserRelevantSuggestion) + all_relevance_suggestions: list[UserRelevantSuggestion] = await adb_client.get_all(UserRelevantSuggestion) assert len(all_relevance_suggestions) == 2 - assert all_relevance_suggestions[0].relevant == True - assert all_relevance_suggestions[1].relevant == False + assert all_relevance_suggestions[0].suggested_status == SuggestedStatus.RELEVANT.value + assert all_relevance_suggestions[1].suggested_status == SuggestedStatus.NOT_RELEVANT.value # Should be one agency all_agency_suggestions = await adb_client.get_all(UserUrlAgencySuggestion) @@ -682,7 +721,7 @@ async def test_annotate_all_post_batch_filtering(api_test_helper): url_id=url_mapping_1.url_id, batch_id=setup_info_3.batch_id, all_annotations_post_info=AllAnnotationPostInfo( - is_relevant=True, + suggested_status=SuggestedStatus.RELEVANT, record_type=RecordType.ACCIDENT_REPORTS, agency=URLAgencyAnnotationPostInfo( is_new=True @@ -708,7 +747,7 @@ async def test_annotate_all_validation_error(api_test_helper): response = await ath.request_validator.post_all_annotations_and_get_next( url_id=url_mapping_1.url_id, all_annotations_post_info=AllAnnotationPostInfo( - is_relevant=False, + suggested_status=SuggestedStatus.NOT_RELEVANT, record_type=RecordType.ACCIDENT_REPORTS ) ) diff --git a/tests/test_automated/integration/api/test_metrics.py b/tests/test_automated/integration/api/test_metrics.py index b8eb6ca6..7d0fadfc 100644 --- a/tests/test_automated/integration/api/test_metrics.py +++ b/tests/test_automated/integration/api/test_metrics.py @@ -2,7 +2,7 @@ import pytest from collector_manager.enums import URLStatus, CollectorType -from core.enums import BatchStatus, RecordType +from core.enums import BatchStatus, RecordType, SuggestedStatus from tests.helpers.test_batch_creation_parameters import TestBatchCreationParameters, TestURLCreationParameters, \ AnnotationInfo @@ -26,7 +26,7 @@ async def test_get_batches_aggregated_metrics(api_test_helper): ), TestURLCreationParameters( count=3, - status=URLStatus.REJECTED + status=URLStatus.NOT_RELEVANT ), TestURLCreationParameters( count=4, @@ -106,7 +106,7 @@ async def test_get_batches_breakdown_metrics(api_test_helper): urls=[ TestURLCreationParameters( count=3, - status=URLStatus.REJECTED + status=URLStatus.NOT_RELEVANT ), TestURLCreationParameters( count=4, @@ -247,7 +247,7 @@ async def test_get_urls_breakdown_pending_metrics(api_test_helper): count=1, status=URLStatus.PENDING, annotation_info=AnnotationInfo( - user_relevant=False + user_relevant=SuggestedStatus.NOT_RELEVANT ) ), TestURLCreationParameters( @@ -264,7 +264,7 @@ async def test_get_urls_breakdown_pending_metrics(api_test_helper): count=3, status=URLStatus.PENDING, annotation_info=AnnotationInfo( - user_relevant=True, + user_relevant=SuggestedStatus.RELEVANT, user_record_type=RecordType.CALLS_FOR_SERVICE ) ) @@ -288,7 +288,7 @@ async def test_get_urls_breakdown_pending_metrics(api_test_helper): count=5, status=URLStatus.PENDING, annotation_info=AnnotationInfo( - user_relevant=True, + user_relevant=SuggestedStatus.RELEVANT, user_record_type=RecordType.INCARCERATION_RECORDS, user_agency=agency_id ) @@ -363,7 +363,7 @@ async def test_get_urls_aggregate_metrics(api_test_helper): ), TestURLCreationParameters( count=5, - status=URLStatus.REJECTED + status=URLStatus.NOT_RELEVANT ), ] ) @@ -401,7 +401,7 @@ async def test_get_backlog_metrics(api_test_helper): count=1, status=URLStatus.PENDING, annotation_info=AnnotationInfo( - user_relevant=False + user_relevant=SuggestedStatus.NOT_RELEVANT ) ), TestURLCreationParameters( @@ -427,7 +427,7 @@ async def test_get_backlog_metrics(api_test_helper): count=4, status=URLStatus.PENDING, annotation_info=AnnotationInfo( - user_relevant=False + user_relevant=SuggestedStatus.NOT_RELEVANT ) ), TestURLCreationParameters( @@ -453,7 +453,7 @@ async def test_get_backlog_metrics(api_test_helper): count=7, status=URLStatus.PENDING, annotation_info=AnnotationInfo( - user_relevant=False + user_relevant=SuggestedStatus.NOT_RELEVANT ) ), TestURLCreationParameters( diff --git a/tests/test_automated/integration/api/test_review.py b/tests/test_automated/integration/api/test_review.py index 1f427c61..facd6926 100644 --- a/tests/test_automated/integration/api/test_review.py +++ b/tests/test_automated/integration/api/test_review.py @@ -3,9 +3,10 @@ from collector_db.constants import PLACEHOLDER_AGENCY_NAME from collector_db.models import URL, URLOptionalDataSourceMetadata, ConfirmedURLAgency, Agency from collector_manager.enums import URLStatus -from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewBaseInfo +from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo, FinalReviewBaseInfo, RejectionReason, \ + FinalReviewRejectionInfo from core.DTOs.GetNextURLForFinalReviewResponse import GetNextURLForFinalReviewOuterResponse -from core.enums import RecordType +from core.enums import RecordType, SuggestedStatus from tests.helpers.complex_test_data_functions import setup_for_get_next_url_for_final_review @@ -46,7 +47,7 @@ async def test_review_next_source(api_test_helper): annotation_info = result.annotations relevant_info = annotation_info.relevant assert relevant_info.auto == True - assert relevant_info.user == False + assert relevant_info.user == SuggestedStatus.NOT_RELEVANT record_type_info = annotation_info.record_type assert record_type_info.auto == RecordType.ARREST_RECORDS @@ -132,8 +133,12 @@ async def test_approve_and_get_next_source_for_review(api_test_helper): if agency.agency_id == additional_agency: assert agency.name == PLACEHOLDER_AGENCY_NAME -@pytest.mark.asyncio -async def test_reject_and_get_next_source_for_review(api_test_helper): + +async def run_rejection_test( + api_test_helper, + rejection_reason: RejectionReason, + url_status: URLStatus +): ath = api_test_helper db_data_creator = ath.db_data_creator @@ -145,8 +150,9 @@ async def test_reject_and_get_next_source_for_review(api_test_helper): url_mapping = setup_info.url_mapping result: GetNextURLForFinalReviewOuterResponse = await ath.request_validator.reject_and_get_next_source_for_review( - review_info=FinalReviewBaseInfo( + review_info=FinalReviewRejectionInfo( url_id=url_mapping.url_id, + rejection_reason=rejection_reason ) ) @@ -154,8 +160,33 @@ async def test_reject_and_get_next_source_for_review(api_test_helper): adb_client = db_data_creator.adb_client # Confirm same agency id is listed as rejected - urls = await adb_client.get_all(URL) + urls: list[URL] = await adb_client.get_all(URL) assert len(urls) == 1 url = urls[0] assert url.id == url_mapping.url_id - assert url.outcome == URLStatus.REJECTED.value \ No newline at end of file + assert url.outcome == url_status.value + +@pytest.mark.asyncio +async def test_rejection_not_relevant(api_test_helper): + await run_rejection_test( + api_test_helper, + rejection_reason=RejectionReason.NOT_RELEVANT, + url_status=URLStatus.NOT_RELEVANT + ) + +@pytest.mark.asyncio +async def test_rejection_broken_page(api_test_helper): + await run_rejection_test( + api_test_helper, + rejection_reason=RejectionReason.BROKEN_PAGE_404, + url_status=URLStatus.NOT_FOUND + ) + +@pytest.mark.asyncio +async def test_rejection_individual_record(api_test_helper): + await run_rejection_test( + api_test_helper, + rejection_reason=RejectionReason.INDIVIDUAL_RECORD, + url_status=URLStatus.INDIVIDUAL_RECORD + ) + diff --git a/tests/test_automated/integration/collector_db/test_db_client.py b/tests/test_automated/integration/collector_db/test_db_client.py index 93edb3ed..e5454343 100644 --- a/tests/test_automated/integration/collector_db/test_db_client.py +++ b/tests/test_automated/integration/collector_db/test_db_client.py @@ -13,7 +13,7 @@ from collector_db.models import URL, ReviewingUserURL, URLOptionalDataSourceMetadata, ConfirmedURLAgency, Agency from collector_manager.enums import URLStatus from core.DTOs.FinalReviewApprovalInfo import FinalReviewApprovalInfo -from core.enums import BatchStatus, RecordType, SuggestionType +from core.enums import BatchStatus, RecordType, SuggestionType, SuggestedStatus from tests.helpers.complex_test_data_functions import setup_for_get_next_url_for_annotation, setup_for_annotate_agency from tests.helpers.DBDataCreator import DBDataCreator from tests.helpers.complex_test_data_functions import setup_for_get_next_url_for_final_review @@ -186,7 +186,7 @@ async def test_get_next_url_for_final_review_basic(db_data_creator: DBDataCreato annotation_info = result.annotations relevant_info = annotation_info.relevant assert relevant_info.auto == True - assert relevant_info.user == False + assert relevant_info.user == SuggestedStatus.NOT_RELEVANT record_type_info = annotation_info.record_type assert record_type_info.auto == RecordType.ARREST_RECORDS @@ -465,7 +465,7 @@ async def test_get_next_url_for_user_relevance_annotation_pending( await adb_client.add_user_relevant_suggestion( url_id=url_1.url_info.url_id, user_id=1, - relevant=True + suggested_status=SuggestedStatus.RELEVANT ) url_3 = await adb_client.get_next_url_for_relevance_annotation( @@ -617,12 +617,12 @@ async def test_annotate_url_marked_not_relevant(db_data_creator: DBDataCreator): await adb_client.add_user_relevant_suggestion( user_id=1, url_id=url_to_mark_not_relevant.url_id, - relevant=False + suggested_status=SuggestedStatus.NOT_RELEVANT ) await adb_client.add_user_relevant_suggestion( user_id=1, url_id=url_to_mark_relevant.url_id, - relevant=True + suggested_status=SuggestedStatus.RELEVANT ) # User should not receive the URL for record type annotation diff --git a/tests/test_automated/unit/dto/test_all_annotation_post_info.py b/tests/test_automated/unit/dto/test_all_annotation_post_info.py index 3e5cbab4..1b35234a 100644 --- a/tests/test_automated/unit/dto/test_all_annotation_post_info.py +++ b/tests/test_automated/unit/dto/test_all_annotation_post_info.py @@ -1,8 +1,7 @@ import pytest -from pydantic import ValidationError from core.DTOs.AllAnnotationPostInfo import AllAnnotationPostInfo -from core.enums import RecordType +from core.enums import RecordType, SuggestedStatus from core.exceptions import FailedValidationException # Mock values to pass @@ -10,21 +9,21 @@ mock_agency = {"is_new": False, "suggested_agency": 1} # replace with a valid dict for the URLAgencyAnnotationPostInfo model @pytest.mark.parametrize( - "is_relevant, record_type, agency, should_raise", + "suggested_status, record_type, agency, should_raise", [ - (True, mock_record_type, mock_agency, False), # valid - (True, None, mock_agency, True), # missing record_type - (True, mock_record_type, None, True), # missing agency - (True, None, None, True), # missing both - (False, None, None, False), # valid - (False, mock_record_type, None, True), # record_type present - (False, None, mock_agency, True), # agency present - (False, mock_record_type, mock_agency, True), # both present + (SuggestedStatus.RELEVANT, mock_record_type, mock_agency, False), # valid + (SuggestedStatus.RELEVANT, None, mock_agency, True), # missing record_type + (SuggestedStatus.RELEVANT, mock_record_type, None, True), # missing agency + (SuggestedStatus.RELEVANT, None, None, True), # missing both + (SuggestedStatus.NOT_RELEVANT, None, None, False), # valid + (SuggestedStatus.NOT_RELEVANT, mock_record_type, None, True), # record_type present + (SuggestedStatus.NOT_RELEVANT, None, mock_agency, True), # agency present + (SuggestedStatus.NOT_RELEVANT, mock_record_type, mock_agency, True), # both present ] ) -def test_all_annotation_post_info_validation(is_relevant, record_type, agency, should_raise): +def test_all_annotation_post_info_validation(suggested_status, record_type, agency, should_raise): data = { - "is_relevant": is_relevant, + "suggested_status": suggested_status.value, "record_type": record_type, "agency": agency } @@ -34,4 +33,4 @@ def test_all_annotation_post_info_validation(is_relevant, record_type, agency, s AllAnnotationPostInfo(**data) else: model = AllAnnotationPostInfo(**data) - assert model.is_relevant == is_relevant + assert model.suggested_status == suggested_status