diff --git a/collector_db/AsyncDatabaseClient.py b/collector_db/AsyncDatabaseClient.py index c8b4a204..8ceda774 100644 --- a/collector_db/AsyncDatabaseClient.py +++ b/collector_db/AsyncDatabaseClient.py @@ -1065,19 +1065,6 @@ def count_subquery(model: Type[Base]): ) ) - count_subqueries = [ - count_subquery(model=model) - for model in models - ] - - sum_of_count_subqueries = ( - sum( - [ - coalesce(subquery.c.count, 0) - for subquery in count_subqueries - ] - ) - ) # Basic URL query url_query = ( @@ -1086,13 +1073,10 @@ def count_subquery(model: Type[Base]): ( sum_of_exist_subqueries ).label("total_distinct_annotation_count"), - ( - sum_of_count_subqueries - ).label("total_overall_annotation_count") ) ) - for subquery in (exist_subqueries + count_subqueries): + for subquery in exist_subqueries: url_query = url_query.outerjoin( subquery, URL.id == subquery.c.url_id ) @@ -1110,8 +1094,8 @@ def count_subquery(model: Type[Base]): URL.html_content, URL.auto_record_type_suggestion, URL.auto_relevant_suggestion, - URL.user_relevant_suggestions, - URL.user_record_type_suggestions, + URL.user_relevant_suggestion, + URL.user_record_type_suggestion, URL.optional_data_source_metadata, ] @@ -1122,7 +1106,7 @@ def count_subquery(model: Type[Base]): # The below relationships are joined to entities that are joined to the URL double_join_relationships = [ (URL.automated_agency_suggestions, AutomatedUrlAgencySuggestion.agency), - (URL.user_agency_suggestions, UserUrlAgencySuggestion.agency), + (URL.user_agency_suggestion, UserUrlAgencySuggestion.agency), (URL.confirmed_agencies, ConfirmedURLAgency.agency) ] for primary, secondary in double_join_relationships: @@ -1134,7 +1118,6 @@ def count_subquery(model: Type[Base]): # Apply order clause url_query = url_query.order_by( desc("total_distinct_annotation_count"), - desc("total_overall_annotation_count"), asc(URL.id) ) @@ -1173,16 +1156,16 @@ def count_subquery(model: Type[Base]): description=result.description, annotations=FinalReviewAnnotationInfo( relevant=DTOConverter.final_review_annotation_relevant_info( - user_suggestions=result.user_relevant_suggestions, + user_suggestion=result.user_relevant_suggestion, auto_suggestion=result.auto_relevant_suggestion ), record_type=DTOConverter.final_review_annotation_record_type_info( - user_suggestions=result.user_record_type_suggestions, + user_suggestion=result.user_record_type_suggestion, auto_suggestion=result.auto_record_type_suggestion ), agency=DTOConverter.final_review_annotation_agency_info( automated_agency_suggestions=result.automated_agency_suggestions, - user_agency_suggestions=result.user_agency_suggestions, + user_agency_suggestion=result.user_agency_suggestion, confirmed_agencies=result.confirmed_agencies ) ), diff --git a/collector_db/DTOConverter.py b/collector_db/DTOConverter.py index 0d2856cf..2b6cf521 100644 --- a/collector_db/DTOConverter.py +++ b/collector_db/DTOConverter.py @@ -2,16 +2,15 @@ from collector_db.DTOs.URLHTMLContentInfo import HTMLContentType, URLHTMLContentInfo from collector_db.DTOs.URLWithHTML import URLWithHTML -from collector_db.enums import ValidationStatus, ValidationSource, URLMetadataAttributeType from collector_db.models import AutomatedUrlAgencySuggestion, UserUrlAgencySuggestion, URLHTMLContent, URL, Agency, \ AutoRecordTypeSuggestion, UserRecordTypeSuggestion, UserRelevantSuggestion, AutoRelevantSuggestion, \ ConfirmedURLAgency from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAgencyInfo from core.DTOs.GetNextURLForFinalReviewResponse import FinalReviewAnnotationRelevantInfo, \ - FinalReviewAnnotationRelevantUsersInfo, FinalReviewAnnotationRecordTypeInfo, FinalReviewAnnotationAgencyAutoInfo, \ - FinalReviewAnnotationAgencyInfo, FinalReviewAnnotationAgencyUserInfo + FinalReviewAnnotationRecordTypeInfo, FinalReviewAnnotationAgencyAutoInfo, \ + FinalReviewAnnotationAgencyInfo from core.enums import RecordType, SuggestionType -from html_tag_collector.DataClassTags import convert_to_response_html_info, ResponseHTMLInfo, ENUM_TO_ATTRIBUTE_MAPPING +from html_tag_collector.DataClassTags import ResponseHTMLInfo, ENUM_TO_ATTRIBUTE_MAPPING class DTOConverter: @@ -21,49 +20,35 @@ class DTOConverter: @staticmethod def final_review_annotation_relevant_info( - user_suggestions: list[UserRelevantSuggestion], + user_suggestion: UserRelevantSuggestion, auto_suggestion: AutoRelevantSuggestion ) -> FinalReviewAnnotationRelevantInfo: auto_value = auto_suggestion.relevant if auto_suggestion else None - - relevant_count = 0 - not_relevant_count = 0 - for suggestion in user_suggestions: - if suggestion.relevant: - relevant_count += 1 - else: - not_relevant_count += 1 + user_value = user_suggestion.relevant if user_suggestion else None return FinalReviewAnnotationRelevantInfo( auto=auto_value, - users=FinalReviewAnnotationRelevantUsersInfo( - relevant=relevant_count, - not_relevant=not_relevant_count - ) + user=user_value ) @staticmethod def final_review_annotation_record_type_info( - user_suggestions: list[UserRecordTypeSuggestion], + user_suggestion: UserRecordTypeSuggestion, auto_suggestion: AutoRecordTypeSuggestion ): - user_count = {} if auto_suggestion is None: auto_value = None else: auto_value = RecordType(auto_suggestion.record_type) - for suggestion in user_suggestions: - value = RecordType(suggestion.record_type) - if value not in user_count: - user_count[value] = 0 - user_count[value] += 1 - # Sort users by count, descending - user_count = dict(sorted(user_count.items(), key=lambda x: x[1], reverse=True)) + if user_suggestion is None: + user_value = None + else: + user_value = RecordType(user_suggestion.record_type) return FinalReviewAnnotationRecordTypeInfo( auto=auto_value, - users=user_count + user=user_value ) @staticmethod @@ -109,27 +94,20 @@ def final_review_annotation_agency_auto_info( @staticmethod def user_url_agency_suggestion_to_final_review_annotation_agency_user_info( - user_url_agency_suggestions: list[UserUrlAgencySuggestion] - ) -> dict[int, FinalReviewAnnotationAgencyUserInfo]: - d = {} - for suggestion in user_url_agency_suggestions: - agency = suggestion.agency - agency_id = agency.agency_id - if agency.agency_id not in d: - d[agency_id] = FinalReviewAnnotationAgencyUserInfo( - suggestion_type=SuggestionType.MANUAL_SUGGESTION, - agency_name=agency.name, - pdap_agency_id=agency_id, - state=agency.state, - county=agency.county, - locality=agency.locality, - count=1 - ) - else: - d[agency_id].count += 1 + user_url_agency_suggestion: UserUrlAgencySuggestion + ) -> Optional[GetNextURLForAgencyAgencyInfo]: + suggestion = user_url_agency_suggestion + if suggestion is None: + return None + return GetNextURLForAgencyAgencyInfo( + suggestion_type=SuggestionType.MANUAL_SUGGESTION, + pdap_agency_id=suggestion.agency_id, + agency_name=suggestion.agency.name, + state=suggestion.agency.state, + county=suggestion.agency.county, + locality=suggestion.agency.locality + ) - # Return sorted - return dict(sorted(d.items(), key=lambda x: x[1].count, reverse=True)) @staticmethod def confirmed_agencies_to_final_review_annotation_agency_info( @@ -154,7 +132,7 @@ def confirmed_agencies_to_final_review_annotation_agency_info( def final_review_annotation_agency_info( automated_agency_suggestions: list[AutomatedUrlAgencySuggestion], confirmed_agencies: list[ConfirmedURLAgency], - user_agency_suggestions: list[UserUrlAgencySuggestion] + user_agency_suggestion: UserUrlAgencySuggestion ): confirmed_agency_info = DTOConverter.confirmed_agencies_to_final_review_annotation_agency_info( @@ -166,12 +144,12 @@ def final_review_annotation_agency_info( ) agency_user_info = DTOConverter.user_url_agency_suggestion_to_final_review_annotation_agency_user_info( - user_agency_suggestions + user_agency_suggestion ) return FinalReviewAnnotationAgencyInfo( confirmed=confirmed_agency_info, - users=agency_user_info, + user=agency_user_info, auto=agency_auto_info ) diff --git a/collector_db/models.py b/collector_db/models.py index e98ef437..4ac117d6 100644 --- a/collector_db/models.py +++ b/collector_db/models.py @@ -119,16 +119,16 @@ class URL(Base): ) automated_agency_suggestions = relationship( "AutomatedUrlAgencySuggestion", back_populates="url") - user_agency_suggestions = relationship( - "UserUrlAgencySuggestion", back_populates="url") + user_agency_suggestion = relationship( + "UserUrlAgencySuggestion", uselist=False, back_populates="url") auto_record_type_suggestion = relationship( "AutoRecordTypeSuggestion", uselist=False, back_populates="url") - user_record_type_suggestions = relationship( - "UserRecordTypeSuggestion", back_populates="url") + user_record_type_suggestion = relationship( + "UserRecordTypeSuggestion", uselist=False, back_populates="url") auto_relevant_suggestion = relationship( "AutoRelevantSuggestion", uselist=False, back_populates="url") - user_relevant_suggestions = relationship( - "UserRelevantSuggestion", back_populates="url") + user_relevant_suggestion = relationship( + "UserRelevantSuggestion", uselist=False, back_populates="url") reviewing_user = relationship( "ReviewingUserURL", uselist=False, back_populates="url") optional_data_source_metadata = relationship( @@ -375,7 +375,7 @@ class UserUrlAgencySuggestion(Base): is_new = Column(Boolean, nullable=True) agency = relationship("Agency", back_populates="user_suggestions") - url = relationship("URL", back_populates="user_agency_suggestions") + url = relationship("URL", back_populates="user_agency_suggestion") __table_args__ = ( UniqueConstraint("agency_id", "url_id", "user_id", name="uq_user_url_agency_suggestions"), @@ -432,7 +432,7 @@ class UserRelevantSuggestion(Base): # Relationships - url = relationship("URL", back_populates="user_relevant_suggestions") + url = relationship("URL", back_populates="user_relevant_suggestion") class UserRecordTypeSuggestion(Base): @@ -451,4 +451,4 @@ class UserRecordTypeSuggestion(Base): # Relationships - url = relationship("URL", back_populates="user_record_type_suggestions") \ No newline at end of file + url = relationship("URL", back_populates="user_record_type_suggestion") \ No newline at end of file diff --git a/core/DTOs/GetNextURLForFinalReviewResponse.py b/core/DTOs/GetNextURLForFinalReviewResponse.py index f7f44e32..c9e838b6 100644 --- a/core/DTOs/GetNextURLForFinalReviewResponse.py +++ b/core/DTOs/GetNextURLForFinalReviewResponse.py @@ -6,26 +6,21 @@ from core.enums import RecordType from html_tag_collector.DataClassTags import ResponseHTMLInfo - -class FinalReviewAnnotationRelevantUsersInfo(BaseModel): - relevant: int = Field(title="Number of users who marked the URL as relevant") - not_relevant: int = Field(title="Number of users who marked the URL as not relevant") - class FinalReviewAnnotationRelevantInfo(BaseModel): auto: Optional[bool] = Field(title="Whether the auto-labeler has marked the URL as relevant") - users: FinalReviewAnnotationRelevantUsersInfo = Field( - title="How users identified the relevancy of the source", + user: Optional[bool] = Field( + title="Whether a user has marked the URL as relevant", ) class FinalReviewAnnotationRecordTypeInfo(BaseModel): - auto: Optional[RecordType] = Field(title="The record type suggested by the auto-labeler") - users: dict[RecordType, int] = Field( - title="A dictionary, sorted by size and omitting zero values, of all record types suggested by users", + auto: Optional[RecordType] = Field( + title="The record type suggested by the auto-labeler" + ) + user: Optional[RecordType] = Field( + title="The record type suggested by a user", ) # region Agency -class FinalReviewAnnotationAgencyUserInfo(GetNextURLForAgencyAgencyInfo): - count: int = Field(title="Number of times suggested by users") class FinalReviewAnnotationAgencyAutoInfo(BaseModel): unknown: bool = Field(title="Whether the auto-labeler suggested the URL as unknown") @@ -39,8 +34,8 @@ class FinalReviewAnnotationAgencyInfo(BaseModel): ) auto: Optional[FinalReviewAnnotationAgencyAutoInfo] = Field( title="A single agency or a list of agencies suggested by the auto-labeler",) - users: Optional[dict[int, FinalReviewAnnotationAgencyUserInfo]] = Field( - title="A list, sorted by size, of all agencies suggested by users", + user: Optional[GetNextURLForAgencyAgencyInfo] = Field( + title="A single agency suggested by a user", ) # endregion diff --git a/tests/helpers/complex_test_data_functions.py b/tests/helpers/complex_test_data_functions.py index 955e1cf6..6f9ca7c3 100644 --- a/tests/helpers/complex_test_data_functions.py +++ b/tests/helpers/complex_test_data_functions.py @@ -56,6 +56,7 @@ async def setup_for_annotate_agency( class FinalReviewSetupInfo(BaseModel): batch_id: int url_mapping: URLMapping + user_agency_id: Optional[int] async def setup_for_get_next_url_for_final_review( db_data_creator: DBDataCreator, @@ -78,27 +79,25 @@ async def setup_for_get_next_url_for_final_review( await db_data_creator.url_miscellaneous_metadata(url_id=url_mapping.url_id) await db_data_creator.html_data([url_mapping.url_id]) - async def add_agency_suggestion(count: int): + async def add_agency_suggestion() -> int: agency_id = await db_data_creator.agency() - for i in range(count): - await db_data_creator.agency_user_suggestions( - url_id=url_mapping.url_id, - agency_id=agency_id - ) - - async def add_record_type_suggestion(count: int, record_type: RecordType): - for i in range(count): - await db_data_creator.user_record_type_suggestion( - url_id=url_mapping.url_id, - record_type=record_type - ) - - async def add_relevant_suggestion(count: int, relevant: bool): - for i in range(count): - await db_data_creator.user_relevant_suggestion( - url_id=url_mapping.url_id, - relevant=relevant - ) + await db_data_creator.agency_user_suggestions( + url_id=url_mapping.url_id, + agency_id=agency_id + ) + return agency_id + + async def add_record_type_suggestion(record_type: RecordType): + await db_data_creator.user_record_type_suggestion( + url_id=url_mapping.url_id, + record_type=record_type + ) + + async def add_relevant_suggestion(relevant: bool): + await db_data_creator.user_relevant_suggestion( + url_id=url_mapping.url_id, + relevant=relevant + ) await db_data_creator.auto_relevant_suggestions( url_id=url_mapping.url_id, @@ -111,11 +110,14 @@ async def add_relevant_suggestion(count: int, relevant: bool): ) if include_user_annotations: - await add_relevant_suggestion(1, False) - await add_record_type_suggestion(1, RecordType.ACCIDENT_REPORTS) - await add_agency_suggestion(1) + await add_relevant_suggestion(False) + await add_record_type_suggestion(RecordType.ACCIDENT_REPORTS) + user_agency_id = await add_agency_suggestion() + else: + user_agency_id = None return FinalReviewSetupInfo( batch_id=batch_id, - url_mapping=url_mapping + url_mapping=url_mapping, + user_agency_id=user_agency_id ) diff --git a/tests/test_automated/integration/api/test_review.py b/tests/test_automated/integration/api/test_review.py index 494765b6..1f427c61 100644 --- a/tests/test_automated/integration/api/test_review.py +++ b/tests/test_automated/integration/api/test_review.py @@ -46,14 +46,11 @@ async def test_review_next_source(api_test_helper): annotation_info = result.annotations relevant_info = annotation_info.relevant assert relevant_info.auto == True - assert relevant_info.users.not_relevant == 1 + assert relevant_info.user == False record_type_info = annotation_info.record_type assert record_type_info.auto == RecordType.ARREST_RECORDS - user_d = record_type_info.users - assert user_d[RecordType.ACCIDENT_REPORTS] == 1 - assert list(user_d.keys()) == [RecordType.ACCIDENT_REPORTS] - + assert record_type_info.user == RecordType.ACCIDENT_REPORTS agency_info = annotation_info.agency auto_agency_suggestions = agency_info.auto @@ -61,9 +58,9 @@ async def test_review_next_source(api_test_helper): assert len(auto_agency_suggestions.suggestions) == 3 # Check user agency suggestions exist and in descending order of count - user_agency_suggestions = agency_info.users - user_agency_suggestions_as_list = list(user_agency_suggestions.values()) - assert len(user_agency_suggestions_as_list) == 1 + user_agency_suggestion = agency_info.user + assert user_agency_suggestion.pdap_agency_id == setup_info.user_agency_id + # Check confirmed agencies exist confirmed_agencies = agency_info.confirmed @@ -78,13 +75,12 @@ async def test_approve_and_get_next_source_for_review(api_test_helper): setup_info = await setup_for_get_next_url_for_final_review( db_data_creator=db_data_creator, - annotation_count=3, include_user_annotations=True ) url_mapping = setup_info.url_mapping # Add confirmed agency - confirmed_agency = await db_data_creator.confirmed_suggestions([url_mapping.url_id]) + await db_data_creator.confirmed_suggestions([url_mapping.url_id]) # Additionally, include an agency not yet included in the database additional_agency = 999999 diff --git a/tests/test_automated/integration/collector_db/test_db_client.py b/tests/test_automated/integration/collector_db/test_db_client.py index 71bed7b4..5ea0bee2 100644 --- a/tests/test_automated/integration/collector_db/test_db_client.py +++ b/tests/test_automated/integration/collector_db/test_db_client.py @@ -186,24 +186,20 @@ async def test_get_next_url_for_final_review_basic(db_data_creator: DBDataCreato annotation_info = result.annotations relevant_info = annotation_info.relevant assert relevant_info.auto == True - assert relevant_info.users.not_relevant == 1 + assert relevant_info.user == False record_type_info = annotation_info.record_type assert record_type_info.auto == RecordType.ARREST_RECORDS - user_d = record_type_info.users - assert user_d[RecordType.ACCIDENT_REPORTS] == 1 - assert list(user_d.keys()) == [RecordType.ACCIDENT_REPORTS] - + assert record_type_info.user == RecordType.ACCIDENT_REPORTS agency_info = annotation_info.agency auto_agency_suggestions = agency_info.auto assert auto_agency_suggestions.unknown == False assert len(auto_agency_suggestions.suggestions) == 3 - # Check user agency suggestions exist and in descending order of count - user_agency_suggestions = agency_info.users - user_agency_suggestions_as_list = list(user_agency_suggestions.values()) - assert len(user_agency_suggestions_as_list) == 1 + # Check user agency suggestion exists and is correct + assert agency_info.user.pdap_agency_id == setup_info.user_agency_id + @pytest.mark.asyncio async def test_get_next_url_for_final_review_batch_id_filtering(db_data_creator: DBDataCreator): @@ -301,12 +297,11 @@ async def test_get_next_url_for_final_review_no_annotations(db_data_creator: DBD record_type = annotations.record_type assert record_type.auto is None - assert record_type.users == {} + assert record_type.user is None relevant = annotations.relevant assert relevant.auto is None - assert relevant.users.relevant == 0 - assert relevant.users.not_relevant == 0 + assert relevant.user is None @pytest.mark.asyncio async def test_get_next_url_for_final_review_only_confirmed_urls(db_data_creator: DBDataCreator):