From 749c2b5089a0231a43b2622cd97701048e560da7 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Thu, 18 Dec 2025 09:38:22 -0500 Subject: [PATCH 1/2] Privilege manually-submitted URLs for annotation sorting --- .../annotate/_shared/queries/helper.py | 57 +++++++++++++++++++ .../annotate/all/get/queries/core.py | 41 +++---------- .../endpoints/annotate/anonymous/get/query.py | 30 ++-------- src/api/endpoints/contributions/routes.py | 2 +- .../api/annotate/all/test_sorting.py | 48 ++++++++++++++++ .../data_creator/commands/impl/urls_/query.py | 6 +- tests/helpers/data_creator/core.py | 2 + tests/helpers/setup/final_review/core.py | 7 ++- 8 files changed, 131 insertions(+), 62 deletions(-) create mode 100644 src/api/endpoints/annotate/_shared/queries/helper.py create mode 100644 tests/automated/integration/api/annotate/all/test_sorting.py diff --git a/src/api/endpoints/annotate/_shared/queries/helper.py b/src/api/endpoints/annotate/_shared/queries/helper.py new file mode 100644 index 00000000..f5bf55eb --- /dev/null +++ b/src/api/endpoints/annotate/_shared/queries/helper.py @@ -0,0 +1,57 @@ +""" +This module contains helper functions for the annotate GET queries +""" + +from sqlalchemy import Select, case +from sqlalchemy.orm import joinedload + +from src.db.models.impl.url.core.enums import URLSource +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.views.unvalidated_url import UnvalidatedURL +from src.db.models.views.url_anno_count import URLAnnotationCount +from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView + + +def get_select() -> Select: + return ( + Select(URL) + # URL Must be unvalidated + .join( + UnvalidatedURL, + UnvalidatedURL.url_id == URL.id + ) + .join( + URLAnnotationFlagsView, + URLAnnotationFlagsView.url_id == URL.id + ) + .join( + URLAnnotationCount, + URLAnnotationCount.url_id == URL.id + ) + ) + +def conclude(query: Select) -> Select: + query = ( + # Add load options + query.options( + joinedload(URL.html_content), + joinedload(URL.user_relevant_suggestions), + joinedload(URL.user_record_type_suggestions), + joinedload(URL.name_suggestions), + ) + # Sorting Priority + .order_by( + # Privilege manually submitted URLs first + case( + (URL.source == URLSource.MANUAL, 0), + else_=1 + ).asc(), + # Break ties by favoring URL with higher total annotations + URLAnnotationCount.total_anno_count.desc(), + # Break additional ties by favoring least recently created URLs + URL.id.asc() + ) + # Limit to 1 result + .limit(1) + ) + return query \ No newline at end of file diff --git a/src/api/endpoints/annotate/all/get/queries/core.py b/src/api/endpoints/annotate/all/get/queries/core.py index 89975a08..c63e8489 100644 --- a/src/api/endpoints/annotate/all/get/queries/core.py +++ b/src/api/endpoints/annotate/all/get/queries/core.py @@ -1,8 +1,8 @@ -from sqlalchemy import Select, exists, select +from sqlalchemy import exists, select from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import joinedload from src.api.endpoints.annotate._shared.extract import extract_and_format_get_annotation_result +from src.api.endpoints.annotate._shared.queries import helper from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended @@ -12,9 +12,6 @@ from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion -from src.db.models.views.unvalidated_url import UnvalidatedURL -from src.db.models.views.url_anno_count import URLAnnotationCount -from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView from src.db.queries.base.builder import QueryBuilderBase @@ -35,22 +32,9 @@ async def run( self, session: AsyncSession ) -> GetNextURLForAllAnnotationResponse: - query = ( - Select(URL) - # URL Must be unvalidated - .join( - UnvalidatedURL, - UnvalidatedURL.url_id == URL.id - ) - .join( - URLAnnotationFlagsView, - URLAnnotationFlagsView.url_id == URL.id - ) - .join( - URLAnnotationCount, - URLAnnotationCount.url_id == URL.id - ) - ) + query = helper.get_select() + + # Add user annotation-specific joins and conditions if self.batch_id is not None: query = query.join(LinkBatchURL).where(LinkBatchURL.batch_id == self.batch_id) if self.url_id is not None: @@ -102,18 +86,11 @@ async def run( ) ) ) - # Add load options - query = query.options( - joinedload(URL.html_content), - joinedload(URL.user_relevant_suggestions), - joinedload(URL.user_record_type_suggestions), - joinedload(URL.name_suggestions), - ) - query = query.order_by( - URLAnnotationCount.total_anno_count.desc(), - URL.id.asc() - ).limit(1) + + # Conclude query with limit and sorting + query = helper.conclude(query) + raw_results = (await session.execute(query)).unique() url: URL | None = raw_results.scalars().one_or_none() if url is None: diff --git a/src/api/endpoints/annotate/anonymous/get/query.py b/src/api/endpoints/annotate/anonymous/get/query.py index 041d5cda..ffe0a7b0 100644 --- a/src/api/endpoints/annotate/anonymous/get/query.py +++ b/src/api/endpoints/annotate/anonymous/get/query.py @@ -21,6 +21,7 @@ from src.db.models.views.url_anno_count import URLAnnotationCount from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView from src.db.queries.base.builder import QueryBuilderBase +from src.api.endpoints.annotate._shared.queries import helper class GetNextURLForAnonymousAnnotationQueryBuilder(QueryBuilderBase): @@ -33,22 +34,11 @@ def __init__( self.session_id = session_id async def run(self, session: AsyncSession) -> GetNextURLForAnonymousAnnotationResponse: + query = helper.get_select() + # Add anonymous annotation-specific conditions. query = ( - Select(URL) - # URL Must be unvalidated - .join( - UnvalidatedURL, - UnvalidatedURL.url_id == URL.id - ) - .join( - URLAnnotationFlagsView, - URLAnnotationFlagsView.url_id == URL.id - ) - .join( - URLAnnotationCount, - URLAnnotationCount.url_id == URL.id - ) + query .where( URL.status == URLStatus.OK.value, # Must not have been previously annotated by user @@ -77,18 +67,8 @@ async def run(self, session: AsyncSession) -> GetNextURLForAnonymousAnnotationRe ) ) ) - .options( - joinedload(URL.html_content), - joinedload(URL.user_relevant_suggestions), - joinedload(URL.user_record_type_suggestions), - joinedload(URL.name_suggestions), - ) - .order_by( - URLAnnotationCount.total_anno_count.desc(), - URL.id.asc() - ) - .limit(1) ) + query = helper.conclude(query) raw_results = (await session.execute(query)).unique() url: URL | None = raw_results.scalars().one_or_none() diff --git a/src/api/endpoints/contributions/routes.py b/src/api/endpoints/contributions/routes.py index 457ece29..a5032708 100644 --- a/src/api/endpoints/contributions/routes.py +++ b/src/api/endpoints/contributions/routes.py @@ -7,7 +7,7 @@ from src.api.endpoints.contributions.user.response import ContributionsUserResponse from src.core.core import AsyncCore from src.security.dtos.access_info import AccessInfo -from src.security.manager import get_access_info, get_standard_user_access_info +from src.security.manager import get_standard_user_access_info contributions_router = APIRouter( prefix="/contributions", diff --git a/tests/automated/integration/api/annotate/all/test_sorting.py b/tests/automated/integration/api/annotate/all/test_sorting.py new file mode 100644 index 00000000..a1c59813 --- /dev/null +++ b/tests/automated/integration/api/annotate/all/test_sorting.py @@ -0,0 +1,48 @@ +import pytest + +from src.db.models.impl.url.core.enums import URLSource +from tests.helpers.api_test_helper import APITestHelper +from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review +from tests.helpers.setup.final_review.model import FinalReviewSetupInfo + + +@pytest.mark.asyncio +async def test_annotate_sorting( + api_test_helper: APITestHelper, + +): + """ + Test that annotations are prioritized in the following order: + - Any manual submissions are prioritized first + - Then prioritize by number of annotations descending + - Then prioritize by URL ID ascending (e.g. least recently created) + """ + ath = api_test_helper + + # First URL created should be prioritized in absence of any other factors + setup_info_first_annotation: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, + include_user_annotations=False + ) + get_response_1 = await ath.request_validator.get_next_url_for_all_annotations() + assert get_response_1.next_annotation is not None + assert get_response_1.next_annotation.url_info.url_id == setup_info_first_annotation.url_mapping.url_id + + # ...But higher annotation count should take precedence over least recently created + setup_info_high_annotations: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, + include_user_annotations=True + ) + get_response_2 = await ath.request_validator.get_next_url_for_all_annotations() + assert get_response_2.next_annotation is not None + assert get_response_2.next_annotation.url_info.url_id == setup_info_high_annotations.url_mapping.url_id + + # ...But manual submissions should take precedence over higher annotation count + setup_info_manual_submission: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review( + db_data_creator=ath.db_data_creator, + source=URLSource.MANUAL, + include_user_annotations=True + ) + get_response_3 = await ath.request_validator.get_next_url_for_all_annotations() + assert get_response_3.next_annotation is not None + assert get_response_3.next_annotation.url_info.url_id == setup_info_manual_submission.url_mapping.url_id diff --git a/tests/helpers/data_creator/commands/impl/urls_/query.py b/tests/helpers/data_creator/commands/impl/urls_/query.py index 1123af8e..c4fddad4 100644 --- a/tests/helpers/data_creator/commands/impl/urls_/query.py +++ b/tests/helpers/data_creator/commands/impl/urls_/query.py @@ -19,7 +19,8 @@ def __init__( url_count: int, collector_metadata: dict | None = None, status: URLCreationEnum = URLCreationEnum.OK, - created_at: datetime | None = None + created_at: datetime | None = None, + source: URLSource = URLSource.COLLECTOR ): super().__init__() self.batch_id = batch_id @@ -27,6 +28,7 @@ def __init__( self.collector_metadata = collector_metadata self.status = status self.created_at = created_at + self.source = source async def run(self) -> InsertURLsInfo: raise NotImplementedError @@ -45,7 +47,7 @@ def run_sync(self) -> InsertURLsInfo: ) else None, collector_metadata=self.collector_metadata, created_at=self.created_at, - source=URLSource.COLLECTOR + source=self.source ) ) diff --git a/tests/helpers/data_creator/core.py b/tests/helpers/data_creator/core.py index dd08a178..93ece6e1 100644 --- a/tests/helpers/data_creator/core.py +++ b/tests/helpers/data_creator/core.py @@ -266,6 +266,7 @@ def urls( url_count: int, collector_metadata: dict | None = None, outcome: URLCreationEnum = URLCreationEnum.OK, + source: URLSource = URLSource.COLLECTOR, created_at: datetime | None = None ) -> InsertURLsInfo: command = URLsDBDataCreatorCommand( @@ -273,6 +274,7 @@ def urls( url_count=url_count, collector_metadata=collector_metadata, status=outcome, + source=source, created_at=created_at ) return self.run_command_sync(command) diff --git a/tests/helpers/setup/final_review/core.py b/tests/helpers/setup/final_review/core.py index a3a3d42c..9acd733c 100644 --- a/tests/helpers/setup/final_review/core.py +++ b/tests/helpers/setup/final_review/core.py @@ -3,6 +3,7 @@ from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo from src.core.enums import RecordType from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.impl.url.core.enums import URLSource from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.setup.final_review.model import FinalReviewSetupInfo @@ -11,7 +12,8 @@ async def setup_for_get_next_url_for_final_review( db_data_creator: DBDataCreator, annotation_count: int | None = None, include_user_annotations: bool = True, - include_miscellaneous_metadata: bool = True + include_miscellaneous_metadata: bool = True, + source: URLSource = URLSource.COLLECTOR ) -> FinalReviewSetupInfo: """ Sets up the database to test the final_review functions @@ -22,7 +24,8 @@ async def setup_for_get_next_url_for_final_review( batch_id = db_data_creator.batch() url_mapping = db_data_creator.urls( batch_id=batch_id, - url_count=1 + url_count=1, + source=source ).url_mappings[0] if include_miscellaneous_metadata: await db_data_creator.url_miscellaneous_metadata(url_id=url_mapping.url_id) From d53e1c00a74c942d916b8d813a4c75852b9c74d2 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Thu, 18 Dec 2025 09:42:53 -0500 Subject: [PATCH 2/2] Remove unused test setup parameter --- tests/automated/integration/db/client/approve_url/test_basic.py | 1 - tests/automated/integration/db/client/approve_url/test_error.py | 1 - tests/helpers/setup/final_review/core.py | 1 - 3 files changed, 3 deletions(-) diff --git a/tests/automated/integration/db/client/approve_url/test_basic.py b/tests/automated/integration/db/client/approve_url/test_basic.py index f090a4ea..9421c1b7 100644 --- a/tests/automated/integration/db/client/approve_url/test_basic.py +++ b/tests/automated/integration/db/client/approve_url/test_basic.py @@ -17,7 +17,6 @@ async def test_approve_url_basic(db_data_creator: DBDataCreator): setup_info = await setup_for_get_next_url_for_final_review( db_data_creator=db_data_creator, - annotation_count=3, include_user_annotations=True ) url_mapping = setup_info.url_mapping diff --git a/tests/automated/integration/db/client/approve_url/test_error.py b/tests/automated/integration/db/client/approve_url/test_error.py index 352e737a..f358a74b 100644 --- a/tests/automated/integration/db/client/approve_url/test_error.py +++ b/tests/automated/integration/db/client/approve_url/test_error.py @@ -11,7 +11,6 @@ async def test_approval_url_error(db_data_creator: DBDataCreator): setup_info = await setup_for_get_next_url_for_final_review( db_data_creator=db_data_creator, - annotation_count=3, include_user_annotations=True, include_miscellaneous_metadata=False ) diff --git a/tests/helpers/setup/final_review/core.py b/tests/helpers/setup/final_review/core.py index 9acd733c..c474fe2c 100644 --- a/tests/helpers/setup/final_review/core.py +++ b/tests/helpers/setup/final_review/core.py @@ -10,7 +10,6 @@ async def setup_for_get_next_url_for_final_review( db_data_creator: DBDataCreator, - annotation_count: int | None = None, include_user_annotations: bool = True, include_miscellaneous_metadata: bool = True, source: URLSource = URLSource.COLLECTOR