Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions src/api/endpoints/annotate/_shared/queries/helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
This module contains helper functions for the annotate GET queries
"""

from sqlalchemy import Select, case
from sqlalchemy.orm import joinedload

from src.db.models.impl.url.core.enums import URLSource
from src.db.models.impl.url.core.sqlalchemy import URL
from src.db.models.views.unvalidated_url import UnvalidatedURL
from src.db.models.views.url_anno_count import URLAnnotationCount
from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView


def get_select() -> Select:

Check warning on line 15 in src/api/endpoints/annotate/_shared/queries/helper.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/_shared/queries/helper.py#L15 <103>

Missing docstring in public function
Raw output
./src/api/endpoints/annotate/_shared/queries/helper.py:15:1: D103 Missing docstring in public function
return (
Select(URL)
# URL Must be unvalidated
.join(
UnvalidatedURL,
UnvalidatedURL.url_id == URL.id
)
.join(
URLAnnotationFlagsView,
URLAnnotationFlagsView.url_id == URL.id
)
.join(
URLAnnotationCount,
URLAnnotationCount.url_id == URL.id
)
)

def conclude(query: Select) -> Select:

Check warning on line 33 in src/api/endpoints/annotate/_shared/queries/helper.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/_shared/queries/helper.py#L33 <103>

Missing docstring in public function
Raw output
./src/api/endpoints/annotate/_shared/queries/helper.py:33:1: D103 Missing docstring in public function
query = (
# Add load options
query.options(
joinedload(URL.html_content),
joinedload(URL.user_relevant_suggestions),
joinedload(URL.user_record_type_suggestions),
joinedload(URL.name_suggestions),
)
# Sorting Priority
.order_by(
# Privilege manually submitted URLs first
case(
(URL.source == URLSource.MANUAL, 0),
else_=1
).asc(),
# Break ties by favoring URL with higher total annotations
URLAnnotationCount.total_anno_count.desc(),
# Break additional ties by favoring least recently created URLs
URL.id.asc()
)
# Limit to 1 result
.limit(1)
)
return query

Check warning on line 57 in src/api/endpoints/annotate/_shared/queries/helper.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/_shared/queries/helper.py#L57 <292>

no newline at end of file
Raw output
./src/api/endpoints/annotate/_shared/queries/helper.py:57:17: W292 no newline at end of file
41 changes: 9 additions & 32 deletions src/api/endpoints/annotate/all/get/queries/core.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from sqlalchemy import Select, exists, select
from sqlalchemy import exists, select

Check warning on line 1 in src/api/endpoints/annotate/all/get/queries/core.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/all/get/queries/core.py#L1 <100>

Missing docstring in public module
Raw output
./src/api/endpoints/annotate/all/get/queries/core.py:1:1: D100 Missing docstring in public module
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import joinedload

from src.api.endpoints.annotate._shared.extract import extract_and_format_get_annotation_result
from src.api.endpoints.annotate._shared.queries import helper
from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse
from src.collectors.enums import URLStatus
from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended
Expand All @@ -12,9 +12,6 @@
from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion
from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion
from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion
from src.db.models.views.unvalidated_url import UnvalidatedURL
from src.db.models.views.url_anno_count import URLAnnotationCount
from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView
from src.db.queries.base.builder import QueryBuilderBase


Expand All @@ -35,22 +32,9 @@
self,
session: AsyncSession
) -> GetNextURLForAllAnnotationResponse:
query = (
Select(URL)
# URL Must be unvalidated
.join(
UnvalidatedURL,
UnvalidatedURL.url_id == URL.id
)
.join(
URLAnnotationFlagsView,
URLAnnotationFlagsView.url_id == URL.id
)
.join(
URLAnnotationCount,
URLAnnotationCount.url_id == URL.id
)
)
query = helper.get_select()

# Add user annotation-specific joins and conditions
if self.batch_id is not None:
query = query.join(LinkBatchURL).where(LinkBatchURL.batch_id == self.batch_id)
if self.url_id is not None:
Expand Down Expand Up @@ -102,18 +86,11 @@
)
)
)
# Add load options
query = query.options(
joinedload(URL.html_content),
joinedload(URL.user_relevant_suggestions),
joinedload(URL.user_record_type_suggestions),
joinedload(URL.name_suggestions),
)

query = query.order_by(
URLAnnotationCount.total_anno_count.desc(),
URL.id.asc()
).limit(1)

# Conclude query with limit and sorting

Check failure on line 91 in src/api/endpoints/annotate/all/get/queries/core.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/all/get/queries/core.py#L91 <303>

too many blank lines (2)
Raw output
./src/api/endpoints/annotate/all/get/queries/core.py:91:9: E303 too many blank lines (2)
query = helper.conclude(query)

raw_results = (await session.execute(query)).unique()
url: URL | None = raw_results.scalars().one_or_none()
if url is None:
Expand Down
30 changes: 5 additions & 25 deletions src/api/endpoints/annotate/anonymous/get/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from src.db.models.views.url_anno_count import URLAnnotationCount
from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView
from src.db.queries.base.builder import QueryBuilderBase
from src.api.endpoints.annotate._shared.queries import helper


class GetNextURLForAnonymousAnnotationQueryBuilder(QueryBuilderBase):
Expand All @@ -33,22 +34,11 @@ def __init__(
self.session_id = session_id

async def run(self, session: AsyncSession) -> GetNextURLForAnonymousAnnotationResponse:
query = helper.get_select()

# Add anonymous annotation-specific conditions.
query = (
Select(URL)
# URL Must be unvalidated
.join(
UnvalidatedURL,
UnvalidatedURL.url_id == URL.id
)
.join(
URLAnnotationFlagsView,
URLAnnotationFlagsView.url_id == URL.id
)
.join(
URLAnnotationCount,
URLAnnotationCount.url_id == URL.id
)
query
.where(
URL.status == URLStatus.OK.value,
# Must not have been previously annotated by user
Expand Down Expand Up @@ -77,18 +67,8 @@ async def run(self, session: AsyncSession) -> GetNextURLForAnonymousAnnotationRe
)
)
)
.options(
joinedload(URL.html_content),
joinedload(URL.user_relevant_suggestions),
joinedload(URL.user_record_type_suggestions),
joinedload(URL.name_suggestions),
)
.order_by(
URLAnnotationCount.total_anno_count.desc(),
URL.id.asc()
)
.limit(1)
)
query = helper.conclude(query)

raw_results = (await session.execute(query)).unique()
url: URL | None = raw_results.scalars().one_or_none()
Expand Down
2 changes: 1 addition & 1 deletion src/api/endpoints/contributions/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from src.api.endpoints.contributions.user.response import ContributionsUserResponse
from src.core.core import AsyncCore
from src.security.dtos.access_info import AccessInfo
from src.security.manager import get_access_info, get_standard_user_access_info
from src.security.manager import get_standard_user_access_info

contributions_router = APIRouter(
prefix="/contributions",
Expand Down
48 changes: 48 additions & 0 deletions tests/automated/integration/api/annotate/all/test_sorting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import pytest

Check warning on line 1 in tests/automated/integration/api/annotate/all/test_sorting.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/automated/integration/api/annotate/all/test_sorting.py#L1 <100>

Missing docstring in public module
Raw output
./tests/automated/integration/api/annotate/all/test_sorting.py:1:1: D100 Missing docstring in public module

from src.db.models.impl.url.core.enums import URLSource
from tests.helpers.api_test_helper import APITestHelper
from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review
from tests.helpers.setup.final_review.model import FinalReviewSetupInfo


@pytest.mark.asyncio
async def test_annotate_sorting(
api_test_helper: APITestHelper,

):
"""
Test that annotations are prioritized in the following order:
- Any manual submissions are prioritized first
- Then prioritize by number of annotations descending
- Then prioritize by URL ID ascending (e.g. least recently created)
"""
ath = api_test_helper

# First URL created should be prioritized in absence of any other factors
setup_info_first_annotation: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review(
db_data_creator=ath.db_data_creator,
include_user_annotations=False
)
get_response_1 = await ath.request_validator.get_next_url_for_all_annotations()
assert get_response_1.next_annotation is not None
assert get_response_1.next_annotation.url_info.url_id == setup_info_first_annotation.url_mapping.url_id

# ...But higher annotation count should take precedence over least recently created
setup_info_high_annotations: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review(
db_data_creator=ath.db_data_creator,
include_user_annotations=True
)
get_response_2 = await ath.request_validator.get_next_url_for_all_annotations()
assert get_response_2.next_annotation is not None
assert get_response_2.next_annotation.url_info.url_id == setup_info_high_annotations.url_mapping.url_id

# ...But manual submissions should take precedence over higher annotation count
setup_info_manual_submission: FinalReviewSetupInfo = await setup_for_get_next_url_for_final_review(
db_data_creator=ath.db_data_creator,
source=URLSource.MANUAL,
include_user_annotations=True
)
get_response_3 = await ath.request_validator.get_next_url_for_all_annotations()
assert get_response_3.next_annotation is not None
assert get_response_3.next_annotation.url_info.url_id == setup_info_manual_submission.url_mapping.url_id
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
async def test_approve_url_basic(db_data_creator: DBDataCreator):
setup_info = await setup_for_get_next_url_for_final_review(
db_data_creator=db_data_creator,
annotation_count=3,
include_user_annotations=True
)
url_mapping = setup_info.url_mapping
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
async def test_approval_url_error(db_data_creator: DBDataCreator):
setup_info = await setup_for_get_next_url_for_final_review(
db_data_creator=db_data_creator,
annotation_count=3,
include_user_annotations=True,
include_miscellaneous_metadata=False
)
Expand Down
6 changes: 4 additions & 2 deletions tests/helpers/data_creator/commands/impl/urls_/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@ def __init__(
url_count: int,
collector_metadata: dict | None = None,
status: URLCreationEnum = URLCreationEnum.OK,
created_at: datetime | None = None
created_at: datetime | None = None,
source: URLSource = URLSource.COLLECTOR
):
super().__init__()
self.batch_id = batch_id
self.url_count = url_count
self.collector_metadata = collector_metadata
self.status = status
self.created_at = created_at
self.source = source

async def run(self) -> InsertURLsInfo:
raise NotImplementedError
Expand All @@ -45,7 +47,7 @@ def run_sync(self) -> InsertURLsInfo:
) else None,
collector_metadata=self.collector_metadata,
created_at=self.created_at,
source=URLSource.COLLECTOR
source=self.source
)
)

Expand Down
2 changes: 2 additions & 0 deletions tests/helpers/data_creator/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,13 +266,15 @@ def urls(
url_count: int,
collector_metadata: dict | None = None,
outcome: URLCreationEnum = URLCreationEnum.OK,
source: URLSource = URLSource.COLLECTOR,
created_at: datetime | None = None
) -> InsertURLsInfo:
command = URLsDBDataCreatorCommand(
batch_id=batch_id,
url_count=url_count,
collector_metadata=collector_metadata,
status=outcome,
source=source,
created_at=created_at
)
return self.run_command_sync(command)
Expand Down
8 changes: 5 additions & 3 deletions tests/helpers/setup/final_review/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@
from src.api.endpoints.annotate.agency.post.dto import URLAgencyAnnotationPostInfo
from src.core.enums import RecordType
from src.db.models.impl.flag.url_validated.enums import URLType
from src.db.models.impl.url.core.enums import URLSource
from tests.helpers.data_creator.core import DBDataCreator
from tests.helpers.setup.final_review.model import FinalReviewSetupInfo


async def setup_for_get_next_url_for_final_review(
db_data_creator: DBDataCreator,
annotation_count: int | None = None,
include_user_annotations: bool = True,
include_miscellaneous_metadata: bool = True
include_miscellaneous_metadata: bool = True,
source: URLSource = URLSource.COLLECTOR
) -> FinalReviewSetupInfo:
"""
Sets up the database to test the final_review functions
Expand All @@ -22,7 +23,8 @@ async def setup_for_get_next_url_for_final_review(
batch_id = db_data_creator.batch()
url_mapping = db_data_creator.urls(
batch_id=batch_id,
url_count=1
url_count=1,
source=source
).url_mappings[0]
if include_miscellaneous_metadata:
await db_data_creator.url_miscellaneous_metadata(url_id=url_mapping.url_id)
Expand Down