diff --git a/src/api/endpoints/annotate/relevance/get/dto.py b/src/api/endpoints/annotate/relevance/get/dto.py index 5d494555..b4467365 100644 --- a/src/api/endpoints/annotate/relevance/get/dto.py +++ b/src/api/endpoints/annotate/relevance/get/dto.py @@ -17,7 +17,7 @@ class RelevanceAnnotationResponseInfo(BaseModel): ) class GetNextRelevanceAnnotationResponseInfo(AnnotationInnerResponseInfoBase): - annotation: RelevanceAnnotationInfo | None = Field( + annotation: RelevanceAnnotationResponseInfo | None = Field( title="The auto-labeler's annotation for relevance" ) diff --git a/src/api/endpoints/annotate/relevance/get/query.py b/src/api/endpoints/annotate/relevance/get/query.py index d8cf72e0..ffd37d2c 100644 --- a/src/api/endpoints/annotate/relevance/get/query.py +++ b/src/api/endpoints/annotate/relevance/get/query.py @@ -3,7 +3,8 @@ from src.api.endpoints.annotate._shared.queries.get_annotation_batch_info import GetAnnotationBatchInfoQueryBuilder from src.api.endpoints.annotate._shared.queries.get_next_url_for_user_annotation import \ GetNextURLForUserAnnotationQueryBuilder -from src.api.endpoints.annotate.relevance.get.dto import GetNextRelevanceAnnotationResponseInfo +from src.api.endpoints.annotate.relevance.get.dto import GetNextRelevanceAnnotationResponseInfo, \ + RelevanceAnnotationResponseInfo from src.core.tasks.url.operators.auto_relevant.models.annotation import RelevanceAnnotationInfo from src.db.dto_converter import DTOConverter from src.db.dtos.url.mapping import URLMapping @@ -40,7 +41,7 @@ async def run( ) if url.auto_relevant_suggestion is not None: - suggestion = url.auto_relevant_suggestion.relevant + suggestion = url.auto_relevant_suggestion else: suggestion = None @@ -49,11 +50,11 @@ async def run( url=url.url, url_id=url.id ), - annotation=RelevanceAnnotationInfo( - is_relevant=suggestion.is_relevant, + annotation=RelevanceAnnotationResponseInfo( + is_relevant=suggestion.relevant, confidence=suggestion.confidence, model_name=suggestion.model_name - ), + ) if suggestion else None, html_info=html_response_info, batch_info=await GetAnnotationBatchInfoQueryBuilder( batch_id=self.batch_id, diff --git a/src/core/tasks/url/operators/auto_relevant/queries/get_tdos.py b/src/core/tasks/url/operators/auto_relevant/queries/get_tdos.py index 79acd077..b444b5b3 100644 --- a/src/core/tasks/url/operators/auto_relevant/queries/get_tdos.py +++ b/src/core/tasks/url/operators/auto_relevant/queries/get_tdos.py @@ -1,4 +1,4 @@ -from typing import Any, Sequence +from typing import Sequence from sqlalchemy import select, Row from sqlalchemy.ext.asyncio import AsyncSession @@ -27,13 +27,9 @@ async def run(self, session: AsyncSession) -> list[URLRelevantTDO]: .options( selectinload(URL.compressed_html) ) - .outerjoin( - URLCompressedHTML, - URL.id == URLCompressedHTML.url_id - ) + .join(URLCompressedHTML) .where( URL.outcome == URLStatus.PENDING.value, - URLCompressedHTML.compressed_html.is_not(None) ) ) query = StatementComposer.exclude_urls_with_extant_model( diff --git a/src/db/client/async_.py b/src/db/client/async_.py index cc47d221..00af074d 100644 --- a/src/db/client/async_.py +++ b/src/db/client/async_.py @@ -22,7 +22,9 @@ from src.api.endpoints.annotate.all.get.query import GetNextURLForAllAnnotationQueryBuilder from src.api.endpoints.annotate.all.post.dto import AllAnnotationPostInfo from src.api.endpoints.annotate.dtos.record_type.response import GetNextRecordTypeAnnotationResponseInfo -from src.api.endpoints.annotate.relevance.get.dto import GetNextRelevanceAnnotationResponseInfo +from src.api.endpoints.annotate.relevance.get.dto import GetNextRelevanceAnnotationResponseInfo, \ + RelevanceAnnotationResponseInfo +from src.api.endpoints.annotate.relevance.get.query import GetNextUrlForRelevanceAnnotationQueryBuilder from src.api.endpoints.batch.dtos.get.summaries.response import GetBatchSummariesResponse from src.api.endpoints.batch.dtos.get.summaries.summary import BatchSummary from src.api.endpoints.collector.dtos.manual_batch.post import ManualBatchInputDTO @@ -311,50 +313,12 @@ async def add_user_relevant_suggestion( ) session.add(suggestion) - @session_manager async def get_next_url_for_relevance_annotation( self, - session: AsyncSession, - user_id: int, - batch_id: Optional[int] + batch_id: int | None, + user_id: int | None = None, ) -> GetNextRelevanceAnnotationResponseInfo | None: - - url = await GetNextURLForUserAnnotationQueryBuilder( - user_suggestion_model_to_exclude=UserRelevantSuggestion, - auto_suggestion_relationship=URL.auto_relevant_suggestion, - batch_id=batch_id - ).run(session) - if url is None: - return None - - # Next, get all HTML content for the URL - html_response_info = DTOConverter.html_content_list_to_html_response_info( - url.html_content - ) - - if url.auto_relevant_suggestion is not None: - suggestion = url.auto_relevant_suggestion - else: - suggestion = None - - return GetNextRelevanceAnnotationResponseInfo( - url_info=URLMapping( - url=url.url, - url_id=url.id - ), - annotation=RelevanceAnnotationInfo( - is_relevant=suggestion.relevant, - confidence=suggestion.confidence, - model_name=suggestion.model_name - ) if suggestion is not None else None, - html_info=html_response_info, - batch_info=await GetAnnotationBatchInfoQueryBuilder( - batch_id=batch_id, - models=[ - UserUrlAgencySuggestion, - ] - ).run(session) - ) + return await self.run_query_builder(GetNextUrlForRelevanceAnnotationQueryBuilder(batch_id)) # endregion relevant @@ -609,7 +573,7 @@ async def has_urls_with_html_data_and_without_models( model: Type[Base] ) -> bool: statement = (select(URL) - .join(URLHTMLContent) + .join(URLCompressedHTML) .where(URL.outcome == URLStatus.PENDING.value)) # Exclude URLs with auto suggested record types statement = self.statement_composer.exclude_urls_with_extant_model( diff --git a/tests/automated/integration/tasks/asserts.py b/tests/automated/integration/tasks/asserts.py index 17f7cba5..6e4e0d7e 100644 --- a/tests/automated/integration/tasks/asserts.py +++ b/tests/automated/integration/tasks/asserts.py @@ -5,6 +5,10 @@ async def assert_prereqs_not_met(operator): meets_prereqs = await operator.meets_task_prerequisites() assert not meets_prereqs +async def assert_prereqs_met(operator): + meets_prereqs = await operator.meets_task_prerequisites() + assert meets_prereqs + def assert_task_has_expected_run_info(run_info, url_ids: list[int]): assert run_info.outcome == TaskOperatorOutcome.SUCCESS diff --git a/tests/automated/integration/tasks/url/auto_relevant/test_task.py b/tests/automated/integration/tasks/url/auto_relevant/test_task.py index 0c39cb9a..287b5f13 100644 --- a/tests/automated/integration/tasks/url/auto_relevant/test_task.py +++ b/tests/automated/integration/tasks/url/auto_relevant/test_task.py @@ -4,7 +4,8 @@ from src.db.models.instantiations.url.core import URL from src.db.models.instantiations.url.error_info import URLErrorInfo from src.db.models.instantiations.url.suggestion.relevant.auto import AutoRelevantSuggestion -from tests.automated.integration.tasks.asserts import assert_prereqs_not_met, assert_task_has_expected_run_info +from tests.automated.integration.tasks.asserts import assert_prereqs_not_met, assert_task_has_expected_run_info, \ + assert_prereqs_met from tests.automated.integration.tasks.url.auto_relevant.setup import setup_operator, setup_urls @@ -15,6 +16,7 @@ async def test_url_auto_relevant_task(db_data_creator): await assert_prereqs_not_met(operator) url_ids = await setup_urls(db_data_creator) + await assert_prereqs_met(operator) task_id = await db_data_creator.adb_client.initiate_task(task_type=TaskType.RELEVANCY)