diff --git a/src/api/endpoints/annotate/all/get/models/record_type.py b/src/api/endpoints/annotate/all/get/models/record_type.py new file mode 100644 index 00000000..a1c24911 --- /dev/null +++ b/src/api/endpoints/annotate/all/get/models/record_type.py @@ -0,0 +1,11 @@ +from pydantic import BaseModel + +from src.core.enums import RecordType + + + +class RecordTypeAnnotationSuggestion(BaseModel): + record_type: RecordType + endorsement_count: int + + diff --git a/src/api/endpoints/annotate/all/get/models/response.py b/src/api/endpoints/annotate/all/get/models/response.py index ac444e5a..3f280465 100644 --- a/src/api/endpoints/annotate/all/get/models/response.py +++ b/src/api/endpoints/annotate/all/get/models/response.py @@ -5,6 +5,8 @@ from src.api.endpoints.annotate.agency.get.dto import GetNextURLForAgencyAgencyInfo from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationResponseOuterInfo from src.api.endpoints.annotate.all.get.models.name import NameAnnotationSuggestion +from src.api.endpoints.annotate.all.get.models.record_type import RecordTypeAnnotationSuggestion +from src.api.endpoints.annotate.all.get.models.url_type import URLTypeAnnotationSuggestion from src.api.endpoints.annotate.dtos.shared.base.response import AnnotationInnerResponseInfoBase from src.api.endpoints.annotate.relevance.get.dto import RelevanceAnnotationResponseInfo from src.core.enums import RecordType @@ -17,11 +19,11 @@ class GetNextURLForAllAnnotationInnerResponse(AnnotationInnerResponseInfoBase): location_suggestions: LocationAnnotationResponseOuterInfo | None = Field( title="User and Auto-Suggestions for locations" ) - suggested_relevant: RelevanceAnnotationResponseInfo | None = Field( + url_type_suggestions: list[URLTypeAnnotationSuggestion] = Field( title="Whether the auto-labeler identified the URL as relevant or not" ) - suggested_record_type: RecordType | None = Field( - title="What record type, if any, the auto-labeler identified the URL as" + record_type_suggestions: list[RecordTypeAnnotationSuggestion] = Field( + title="What record type, if any, user and the auto-labeler identified the URL as" ) name_suggestions: list[NameAnnotationSuggestion] | None = Field( title="User and Auto-Suggestions for names" diff --git a/src/api/endpoints/annotate/all/get/models/url_type.py b/src/api/endpoints/annotate/all/get/models/url_type.py new file mode 100644 index 00000000..cbc947e6 --- /dev/null +++ b/src/api/endpoints/annotate/all/get/models/url_type.py @@ -0,0 +1,8 @@ +from pydantic import BaseModel + +from src.db.models.impl.flag.url_validated.enums import URLType + + +class URLTypeAnnotationSuggestion(BaseModel): + url_type: URLType + endorsement_count: int diff --git a/src/api/endpoints/annotate/all/get/queries/convert.py b/src/api/endpoints/annotate/all/get/queries/convert.py new file mode 100644 index 00000000..535a7d15 --- /dev/null +++ b/src/api/endpoints/annotate/all/get/queries/convert.py @@ -0,0 +1,43 @@ +from collections import Counter + +from src.api.endpoints.annotate.all.get.models.record_type import RecordTypeAnnotationSuggestion +from src.api.endpoints.annotate.all.get.models.url_type import URLTypeAnnotationSuggestion +from src.core.enums import RecordType +from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion +from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion + + +def convert_user_url_type_suggestion_to_url_type_annotation_suggestion( + db_suggestions: list[UserURLTypeSuggestion] +) -> list[URLTypeAnnotationSuggestion]: + counter: Counter[URLType] = Counter() + for suggestion in db_suggestions: + counter[suggestion.type] += 1 + anno_suggestions: list[URLTypeAnnotationSuggestion] = [] + for url_type, endorsement_count in counter.most_common(3): + anno_suggestions.append( + URLTypeAnnotationSuggestion( + url_type=url_type, + endorsement_count=endorsement_count, + ) + ) + return anno_suggestions + +def convert_user_record_type_suggestion_to_record_type_annotation_suggestion( + db_suggestions: list[UserRecordTypeSuggestion] +) -> list[RecordTypeAnnotationSuggestion]: + counter: Counter[RecordType] = Counter() + for suggestion in db_suggestions: + counter[suggestion.record_type] += 1 + + anno_suggestions: list[RecordTypeAnnotationSuggestion] = [] + for record_type, endorsement_count in counter.most_common(3): + anno_suggestions.append( + RecordTypeAnnotationSuggestion( + record_type=record_type, + endorsement_count=endorsement_count, + ) + ) + + return anno_suggestions \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/get/queries/core.py b/src/api/endpoints/annotate/all/get/queries/core.py index da859135..cad49b90 100644 --- a/src/api/endpoints/annotate/all/get/queries/core.py +++ b/src/api/endpoints/annotate/all/get/queries/core.py @@ -7,8 +7,13 @@ from src.api.endpoints.annotate.agency.get.queries.agency_suggestion_.core import GetAgencySuggestionsQueryBuilder from src.api.endpoints.annotate.all.get.models.location import LocationAnnotationResponseOuterInfo from src.api.endpoints.annotate.all.get.models.name import NameAnnotationSuggestion +from src.api.endpoints.annotate.all.get.models.record_type import RecordTypeAnnotationSuggestion from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse, \ GetNextURLForAllAnnotationInnerResponse +from src.api.endpoints.annotate.all.get.models.url_type import URLTypeAnnotationSuggestion +from src.api.endpoints.annotate.all.get.queries.convert import \ + convert_user_url_type_suggestion_to_url_type_annotation_suggestion, \ + convert_user_record_type_suggestion_to_record_type_annotation_suggestion from src.api.endpoints.annotate.all.get.queries.location_.core import GetLocationSuggestionsQueryBuilder from src.api.endpoints.annotate.all.get.queries.name.core import GetNameSuggestionsQueryBuilder from src.api.endpoints.annotate.relevance.get.dto import RelevanceAnnotationResponseInfo @@ -104,8 +109,8 @@ async def run( # Add load options query = query.options( joinedload(URL.html_content), - joinedload(URL.auto_relevant_suggestion), - joinedload(URL.auto_record_type_suggestion), + joinedload(URL.user_relevant_suggestions), + joinedload(URL.user_record_type_suggestions), joinedload(URL.name_suggestions), ) @@ -124,14 +129,14 @@ async def run( url.html_content ) - auto_relevant: AutoRelevantSuggestion | None = None - if url.auto_relevant_suggestion is not None: - auto_relevant = url.auto_relevant_suggestion - - auto_record_type: AutoRecordTypeSuggestion | None = None - if url.auto_record_type_suggestion is not None: - auto_record_type = url.auto_record_type_suggestion.record_type - + url_type_suggestions: list[URLTypeAnnotationSuggestion] = \ + convert_user_url_type_suggestion_to_url_type_annotation_suggestion( + url.user_relevant_suggestions + ) + record_type_suggestions: list[RecordTypeAnnotationSuggestion] = \ + convert_user_record_type_suggestion_to_record_type_annotation_suggestion( + url.user_record_type_suggestions + ) agency_suggestions: list[GetNextURLForAgencyAgencyInfo] = \ await GetAgencySuggestionsQueryBuilder(url_id=url.id).run(session) location_suggestions: LocationAnnotationResponseOuterInfo = \ @@ -139,6 +144,7 @@ async def run( name_suggestions: list[NameAnnotationSuggestion] = \ await GetNameSuggestionsQueryBuilder(url_id=url.id).run(session) + return GetNextURLForAllAnnotationResponse( next_annotation=GetNextURLForAllAnnotationInnerResponse( url_info=URLMapping( @@ -146,12 +152,8 @@ async def run( url=url.url ), html_info=html_response_info, - suggested_relevant=RelevanceAnnotationResponseInfo( - is_relevant=auto_relevant.relevant, - confidence=auto_relevant.confidence, - model_name=auto_relevant.model_name - ) if auto_relevant is not None else None, - suggested_record_type=auto_record_type, + url_type_suggestions=url_type_suggestions, + record_type_suggestions=record_type_suggestions, agency_suggestions=agency_suggestions, batch_info=await GetAnnotationBatchInfoQueryBuilder( batch_id=self.batch_id, diff --git a/src/api/endpoints/search/agency/query.py b/src/api/endpoints/search/agency/query.py index 6048468a..28e045be 100644 --- a/src/api/endpoints/search/agency/query.py +++ b/src/api/endpoints/search/agency/query.py @@ -59,6 +59,8 @@ async def run(self, session: AsyncSession) -> list[AgencySearchResponse]: ).desc() ) + query = query.limit(50) + mappings: Sequence[RowMapping] = await sh.mappings(session, query) return [ diff --git a/tests/automated/integration/api/annotate/all/test_happy_path.py b/tests/automated/integration/api/annotate/all/test_happy_path.py index 4ecb9935..c7e1c5b5 100644 --- a/tests/automated/integration/api/annotate/all/test_happy_path.py +++ b/tests/automated/integration/api/annotate/all/test_happy_path.py @@ -33,11 +33,11 @@ async def test_annotate_all( # Set up URLs setup_info_1 = await setup_for_get_next_url_for_final_review( - db_data_creator=ath.db_data_creator, include_user_annotations=False + db_data_creator=ath.db_data_creator, include_user_annotations=True ) url_mapping_1 = setup_info_1.url_mapping setup_info_2 = await setup_for_get_next_url_for_final_review( - db_data_creator=ath.db_data_creator, include_user_annotations=False + db_data_creator=ath.db_data_creator, include_user_annotations=True ) url_mapping_2 = setup_info_2.url_mapping @@ -99,22 +99,25 @@ async def test_annotate_all( # Check that all annotations are present in the database - # Should be two relevance annotations, one True and one False + # Check URL Type Suggestions all_relevance_suggestions: list[UserURLTypeSuggestion] = await adb_client.get_all(UserURLTypeSuggestion) - assert len(all_relevance_suggestions) == 2 - assert all_relevance_suggestions[0].type == URLType.DATA_SOURCE - assert all_relevance_suggestions[1].type == URLType.NOT_RELEVANT + assert len(all_relevance_suggestions) == 4 + suggested_types: set[URLType] = {sugg.type for sugg in all_relevance_suggestions} + assert suggested_types == {URLType.DATA_SOURCE, URLType.NOT_RELEVANT} # Should be one agency all_agency_suggestions = await adb_client.get_all(UserUrlAgencySuggestion) - assert len(all_agency_suggestions) == 1 - assert all_agency_suggestions[0].is_new is None - assert all_agency_suggestions[0].agency_id == agency_id + assert len(all_agency_suggestions) == 3 + suggested_agency_ids: set[int] = {sugg.agency_id for sugg in all_agency_suggestions} + assert agency_id in suggested_agency_ids # Should be one record type all_record_type_suggestions = await adb_client.get_all(UserRecordTypeSuggestion) - assert len(all_record_type_suggestions) == 1 - assert all_record_type_suggestions[0].record_type == RecordType.ACCIDENT_REPORTS.value + assert len(all_record_type_suggestions) == 3 + suggested_record_types: set[RecordType] = { + sugg.record_type for sugg in all_record_type_suggestions + } + assert RecordType.ACCIDENT_REPORTS.value in suggested_record_types # Confirm 3 Location Suggestions, with two belonging to California and one to Pennsylvania all_location_suggestions = await adb_client.get_all(UserLocationSuggestion)