Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""Rename suggestion tables to consistent nomenclature

Revision ID: 9292faed37fd
Revises: dfb64594049f
Create Date: 2025-12-18 09:51:20.074946

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa

Check warning on line 11 in alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py#L11 <401>

'sqlalchemy as sa' imported but unused
Raw output
./alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py:11:1: F401 'sqlalchemy as sa' imported but unused


# revision identifiers, used by Alembic.
revision: str = '9292faed37fd'
down_revision: Union[str, None] = 'dfb64594049f'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None

OLD_NEW_TABLE_MAPPING = {
# Anonymous Suggestions
"anonymous_annotation_agency": "annotation__agency__anon",
"anonymous_annotation_location": "annotation__location__anon",
"anonymous_annotation_record_type": "annotation__record_type__anon",
"anonymous_annotation_url_type": "annotation__url_type__anon",
# User Suggestions
"user_url_agency_suggestions": "annotation__agency__user",
"user_location_suggestions": "annotation__location__user",
"user_record_type_suggestions": "annotation__record_type__user",
"user_url_type_suggestions": "annotation__url_type__user",
# Auto suggestions
"auto_location_id_subtasks": "annotation__location__auto__subtasks",
"location_id_subtask_suggestions": "annotation__location__auto__suggestions",
"url_auto_agency_id_subtasks": "annotation__agency__auto__subtasks",
"agency_id_subtask_suggestions": "annotation__agency__auto__suggestions",
"auto_record_type_suggestions": "annotation__record_type__auto",
"auto_relevant_suggestions": "annotation__url_type__auto",
# Name suggestions
"url_name_suggestions": "annotation__name__suggestions",
"link__anonymous_sessions__name_suggestions": "annotation__name__anon__endorsements",
"link_user_name_suggestions": "annotation__name__user__endorsements",
}

def upgrade() -> None:

Check warning on line 44 in alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py#L44 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py:44:1: D103 Missing docstring in public function
for old_table_name, new_table_name in OLD_NEW_TABLE_MAPPING.items():
op.rename_table(
old_table_name=old_table_name,
new_table_name=new_table_name
)


def downgrade() -> None:

Check warning on line 52 in alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py#L52 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_12_18_0951-9292faed37fd_rename_suggestion_tables_to_consistent_.py:52:1: D103 Missing docstring in public function
pass
4 changes: 2 additions & 2 deletions src/api/endpoints/annotate/_shared/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
from src.api.endpoints.annotate.all.get.queries.name.core import GetNameSuggestionsQueryBuilder
from src.db.dto_converter import DTOConverter
from src.db.dtos.url.mapping_.simple import SimpleURLMapping
from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser
from src.db.models.impl.url.core.sqlalchemy import URL
from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion


async def extract_and_format_get_annotation_result(
Expand Down Expand Up @@ -55,7 +55,7 @@ async def extract_and_format_get_annotation_result(
batch_info=await GetAnnotationBatchInfoQueryBuilder(
batch_id=batch_id,
models=[
UserURLAgencySuggestion,
AnnotationAgencyUser,
]
).run(session),
location_suggestions=location_suggestions,
Expand Down
32 changes: 16 additions & 16 deletions src/api/endpoints/annotate/all/get/queries/agency/requester.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
from src.db.helpers.query import exists_url
from src.db.helpers.session import session_helper as sh
from src.db.models.impl.agency.sqlalchemy import Agency
from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask
from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion
from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser
from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound
from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask
from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion
from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion
from src.db.templates.requester import RequesterBase


Expand All @@ -36,10 +36,10 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]:
.where(
or_(
exists_url(
UserURLAgencySuggestion
AnnotationAgencyUser
),
exists_url(
URLAutoAgencyIDSubtask
AnnotationAgencyAutoSubtask
)
)
)
Expand All @@ -49,34 +49,34 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]:
# Number of users who suggested each agency
user_suggestions_cte = (
select(
UserURLAgencySuggestion.url_id,
UserURLAgencySuggestion.agency_id,
func.count(UserURLAgencySuggestion.user_id).label('user_count')
AnnotationAgencyUser.url_id,
AnnotationAgencyUser.agency_id,
func.count(AnnotationAgencyUser.user_id).label('user_count')
)
.group_by(
UserURLAgencySuggestion.agency_id,
UserURLAgencySuggestion.url_id,
AnnotationAgencyUser.agency_id,
AnnotationAgencyUser.url_id,
)
.cte("user_suggestions")
)

# Maximum confidence of robo annotation, if any
robo_suggestions_cte = (
select(
URLAutoAgencyIDSubtask.url_id,
AnnotationAgencyAutoSubtask.url_id,
Agency.id.label("agency_id"),
func.max(AgencyIDSubtaskSuggestion.confidence).label('robo_confidence')
func.max(AnnotationAgencyAutoSuggestion.confidence).label('robo_confidence')
)
.join(
AgencyIDSubtaskSuggestion,
AgencyIDSubtaskSuggestion.subtask_id == URLAutoAgencyIDSubtask.id
AnnotationAgencyAutoSuggestion,
AnnotationAgencyAutoSuggestion.subtask_id == AnnotationAgencyAutoSubtask.id
)
.join(
Agency,
Agency.id == AgencyIDSubtaskSuggestion.agency_id
Agency.id == AnnotationAgencyAutoSuggestion.agency_id
)
.group_by(
URLAutoAgencyIDSubtask.url_id,
AnnotationAgencyAutoSubtask.url_id,
Agency.id
)
.cte("robo_suggestions")
Expand Down
8 changes: 4 additions & 4 deletions src/api/endpoints/annotate/all/get/queries/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
from src.api.endpoints.annotate.all.get.models.url_type import URLTypeAnnotationSuggestion
from src.core.enums import RecordType
from src.db.models.impl.flag.url_validated.enums import URLType
from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion
from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion
from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType
from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType


def convert_user_url_type_suggestion_to_url_type_annotation_suggestion(
db_suggestions: list[UserURLTypeSuggestion]
db_suggestions: list[AnnotationUserURLType]
) -> list[URLTypeAnnotationSuggestion]:
counter: Counter[URLType] = Counter()
for suggestion in db_suggestions:
Expand All @@ -26,7 +26,7 @@ def convert_user_url_type_suggestion_to_url_type_annotation_suggestion(
return anno_suggestions

def convert_user_record_type_suggestion_to_record_type_annotation_suggestion(
db_suggestions: list[UserRecordTypeSuggestion]
db_suggestions: list[AnnotationUserRecordType]
) -> RecordTypeAnnotationResponseOuterInfo:
counter: Counter[RecordType] = Counter()
for suggestion in db_suggestions:
Expand Down
35 changes: 19 additions & 16 deletions src/api/endpoints/annotate/all/get/queries/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@
from src.api.endpoints.annotate._shared.queries import helper
from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse
from src.collectors.enums import URLStatus
from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser
from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser
from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended
from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
from src.db.models.impl.url.core.sqlalchemy import URL
from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion
from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion
from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion
from src.db.models.impl.url.suggestion.url_type.user import UserURLTypeSuggestion
from src.db.models.impl.annotation.record_type.user.user import AnnotationUserRecordType
from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationUserURLType
from src.db.models.views.unvalidated_url import UnvalidatedURL

Check warning on line 15 in src/api/endpoints/annotate/all/get/queries/core.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/all/get/queries/core.py#L15 <401>

'src.db.models.views.unvalidated_url.UnvalidatedURL' imported but unused
Raw output
./src/api/endpoints/annotate/all/get/queries/core.py:15:1: F401 'src.db.models.views.unvalidated_url.UnvalidatedURL' imported but unused
from src.db.models.views.url_anno_count import URLAnnotationCount

Check warning on line 16 in src/api/endpoints/annotate/all/get/queries/core.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/all/get/queries/core.py#L16 <401>

'src.db.models.views.url_anno_count.URLAnnotationCount' imported but unused
Raw output
./src/api/endpoints/annotate/all/get/queries/core.py:16:1: F401 'src.db.models.views.url_anno_count.URLAnnotationCount' imported but unused
from src.db.models.views.url_annotations_flags import URLAnnotationFlagsView

Check warning on line 17 in src/api/endpoints/annotate/all/get/queries/core.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/all/get/queries/core.py#L17 <401>

'src.db.models.views.url_annotations_flags.URLAnnotationFlagsView' imported but unused
Raw output
./src/api/endpoints/annotate/all/get/queries/core.py:17:1: F401 'src.db.models.views.url_annotations_flags.URLAnnotationFlagsView' imported but unused
from src.db.queries.base.builder import QueryBuilderBase


Expand Down Expand Up @@ -45,35 +48,35 @@
URL.status == URLStatus.OK.value,
# Must not have been previously annotated by user
~exists(
select(UserURLTypeSuggestion.url_id)
select(AnnotationUserURLType.url_id)
.where(
UserURLTypeSuggestion.url_id == URL.id,
UserURLTypeSuggestion.user_id == self.user_id,
AnnotationUserURLType.url_id == URL.id,
AnnotationUserURLType.user_id == self.user_id,
)
),
~exists(
select(UserURLAgencySuggestion.url_id)
select(AnnotationAgencyUser.url_id)
.where(
UserURLAgencySuggestion.url_id == URL.id,
UserURLAgencySuggestion.user_id == self.user_id,
AnnotationAgencyUser.url_id == URL.id,
AnnotationAgencyUser.user_id == self.user_id,
)
),
~exists(
select(
UserLocationSuggestion.url_id
AnnotationLocationUser.url_id
)
.where(
UserLocationSuggestion.url_id == URL.id,
UserLocationSuggestion.user_id == self.user_id,
AnnotationLocationUser.url_id == URL.id,
AnnotationLocationUser.user_id == self.user_id,
)
),
~exists(
select(
UserRecordTypeSuggestion.url_id
AnnotationUserRecordType.url_id
)
.where(
UserRecordTypeSuggestion.url_id == URL.id,
UserRecordTypeSuggestion.user_id == self.user_id,
AnnotationUserRecordType.url_id == URL.id,
AnnotationUserRecordType.user_id == self.user_id,
)
),
~exists(
Expand Down
32 changes: 16 additions & 16 deletions src/api/endpoints/annotate/all/get/queries/location_/requester.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
from src.api.endpoints.annotate.all.get.queries._shared.sort import sort_suggestions
from src.db.helpers.query import exists_url
from src.db.helpers.session import session_helper as sh
from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask
from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion
from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser
from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound
from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask
from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion
from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion
from src.db.models.views.location_expanded import LocationExpandedView
from src.db.templates.requester import RequesterBase

Expand All @@ -25,10 +25,10 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]:
.where(
or_(
exists_url(
UserLocationSuggestion
AnnotationLocationUser
),
exists_url(
AutoLocationIDSubtask
AnnotationLocationAutoSubtask
)
)
)
Expand All @@ -37,34 +37,34 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]:
# Number of users who suggested each location
user_suggestions_cte = (
select(
UserLocationSuggestion.url_id,
UserLocationSuggestion.location_id,
func.count(UserLocationSuggestion.user_id).label('user_count')
AnnotationLocationUser.url_id,
AnnotationLocationUser.location_id,
func.count(AnnotationLocationUser.user_id).label('user_count')
)
.group_by(
UserLocationSuggestion.location_id,
UserLocationSuggestion.url_id,
AnnotationLocationUser.location_id,
AnnotationLocationUser.url_id,
)
.cte("user_suggestions")
)
# Maximum confidence of robo annotation, if any
robo_suggestions_cte = (
select(
AutoLocationIDSubtask.url_id,
AnnotationLocationAutoSubtask.url_id,
LocationExpandedView.id.label("location_id"),
func.max(LocationIDSubtaskSuggestion.confidence).label('robo_confidence')
func.max(AnnotationLocationAutoSuggestion.confidence).label('robo_confidence')
)
.join(
LocationExpandedView,
LocationExpandedView.id == LocationIDSubtaskSuggestion.location_id
LocationExpandedView.id == AnnotationLocationAutoSuggestion.location_id
)
.join(
AutoLocationIDSubtask,
AutoLocationIDSubtask.id == LocationIDSubtaskSuggestion.subtask_id
AnnotationLocationAutoSubtask,
AnnotationLocationAutoSubtask.id == AnnotationLocationAutoSuggestion.subtask_id
)
.group_by(
LocationExpandedView.id,
AutoLocationIDSubtask.url_id,
AnnotationLocationAutoSubtask.url_id,
)
.cte("robo_suggestions")
)
Expand Down
22 changes: 11 additions & 11 deletions src/api/endpoints/annotate/all/get/queries/name/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

from src.api.endpoints.annotate.all.get.models.name import NameAnnotationSuggestion, NameAnnotationResponseOuterInfo
from src.db.helpers.session import session_helper as sh
from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion
from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource
from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion
from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource
from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion
from src.db.models.impl.annotation.name.user.sqlalchemy import LinkUserNameSuggestion
from src.db.queries.base.builder import QueryBuilderBase


Expand All @@ -23,30 +23,30 @@ def __init__(
async def run(self, session: AsyncSession) -> NameAnnotationResponseOuterInfo:
query = (
select(
URLNameSuggestion.id.label('id'),
URLNameSuggestion.suggestion.label('display_name'),
AnnotationNameSuggestion.id.label('id'),
AnnotationNameSuggestion.suggestion.label('display_name'),
func.count(
LinkUserNameSuggestion.user_id
).label('user_count'),
case(
(URLNameSuggestion.source == NameSuggestionSource.HTML_METADATA_TITLE, 1),
(AnnotationNameSuggestion.source == NameSuggestionSource.HTML_METADATA_TITLE, 1),
else_=0
).label("robo_count")
)
.outerjoin(
LinkUserNameSuggestion,
LinkUserNameSuggestion.suggestion_id == URLNameSuggestion.id,
LinkUserNameSuggestion.suggestion_id == AnnotationNameSuggestion.id,
)
.where(
URLNameSuggestion.url_id == self.url_id,
AnnotationNameSuggestion.url_id == self.url_id,
)
.group_by(
URLNameSuggestion.id,
URLNameSuggestion.suggestion,
AnnotationNameSuggestion.id,
AnnotationNameSuggestion.suggestion,
)
.order_by(
func.count(LinkUserNameSuggestion.user_id).desc(),
URLNameSuggestion.id.asc(),
AnnotationNameSuggestion.id.asc(),
)
.limit(3)
)
Expand Down
Loading