Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions src/api/endpoints/annotate/_shared/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,25 @@ async def extract_and_format_get_annotation_result(
html_response_info = DTOConverter.html_content_list_to_html_response_info(
url.html_content
)
# URL Types
url_type_suggestions: list[URLTypeAnnotationSuggestion] = \
convert_user_url_type_suggestion_to_url_type_annotation_suggestion(
url.user_relevant_suggestions
url.user_url_type_suggestions,
url.anon_url_type_suggestions
)
# Record Types
record_type_suggestions: RecordTypeAnnotationResponseOuterInfo = \
convert_user_record_type_suggestion_to_record_type_annotation_suggestion(
url.user_record_type_suggestions
url.user_record_type_suggestions,
url.anon_record_type_suggestions
)
# Agencies
agency_suggestions: AgencyAnnotationResponseOuterInfo = \
await GetAgencySuggestionsQueryBuilder(url_id=url.id).run(session)
# Locations
location_suggestions: LocationAnnotationResponseOuterInfo = \
await GetLocationSuggestionsQueryBuilder(url_id=url.id).run(session)
# Names
name_suggestions: NameAnnotationResponseOuterInfo = \
await GetNameSuggestionsQueryBuilder(url_id=url.id).run(session)
return GetNextURLForAllAnnotationResponse(
Expand Down
5 changes: 3 additions & 2 deletions src/api/endpoints/annotate/_shared/queries/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,10 @@ def conclude(query: Select) -> Select:
# Add load options
query.options(
joinedload(URL.html_content),
joinedload(URL.user_relevant_suggestions),
joinedload(URL.user_url_type_suggestions),
joinedload(URL.user_record_type_suggestions),
joinedload(URL.name_suggestions),
joinedload(URL.anon_record_type_suggestions),
joinedload(URL.anon_url_type_suggestions),
)
# Sorting Priority
.order_by(
Expand Down
34 changes: 32 additions & 2 deletions src/api/endpoints/annotate/all/get/queries/agency/requester.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from src.db.helpers.query import exists_url
from src.db.helpers.session import session_helper as sh
from src.db.models.impl.agency.sqlalchemy import Agency
from src.db.models.impl.annotation.agency.anon.sqlalchemy import AnnotationAgencyAnon
from src.db.models.impl.annotation.agency.auto.subtask.sqlalchemy import AnnotationAgencyAutoSubtask
from src.db.models.impl.annotation.agency.auto.suggestion.sqlalchemy import AnnotationAgencyAutoSuggestion
from src.db.models.impl.annotation.agency.user.sqlalchemy import AnnotationAgencyUser
Expand Down Expand Up @@ -40,6 +41,9 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]:
),
exists_url(
AnnotationAgencyAutoSubtask
),
exists_url(
AnnotationAgencyAnon
)
)
)
Expand All @@ -60,6 +64,20 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]:
.cte("user_suggestions")
)

# Number of anon users who suggested each agency
anon_suggestions_cte = (
select(
AnnotationAgencyAnon.url_id,
AnnotationAgencyAnon.agency_id,
func.count(AnnotationAgencyAnon.session_id).label('anon_count')
)
.group_by(
AnnotationAgencyAnon.agency_id,
AnnotationAgencyAnon.url_id,
)
.cte("anon_suggestions")
)

# Maximum confidence of robo annotation, if any
robo_suggestions_cte = (
select(
Expand Down Expand Up @@ -88,6 +106,7 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]:
Agency.name.label("display_name"),
func.coalesce(user_suggestions_cte.c.user_count, 0).label('user_count'),
func.coalesce(robo_suggestions_cte.c.robo_confidence, 0).label('robo_confidence'),
func.coalesce(anon_suggestions_cte.c.anon_count, 0).label('anon_count'),
)
.join(
Agency,
Expand All @@ -100,6 +119,13 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]:
user_suggestions_cte.c.agency_id == Agency.id
)
)
.outerjoin(
anon_suggestions_cte,
and_(
anon_suggestions_cte.c.url_id == self.url_id,
anon_suggestions_cte.c.agency_id == Agency.id
)
)
.outerjoin(
robo_suggestions_cte,
and_(
Expand All @@ -110,7 +136,8 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]:
.where(
or_(
user_suggestions_cte.c.user_count > 0,
robo_suggestions_cte.c.robo_confidence > 0
robo_suggestions_cte.c.robo_confidence > 0,
anon_suggestions_cte.c.anon_count > 0
)
)
)
Expand All @@ -119,7 +146,10 @@ async def get_agency_suggestions(self) -> list[SuggestionModel]:
mappings: Sequence[RowMapping] = await self.mappings(joined_suggestions_query)
suggestions: list[SuggestionModel] = [
SuggestionModel(
**mapping
id=mapping["id"],
display_name=mapping["display_name"],
user_count=mapping['user_count'] + (mapping['anon_count'] // 2),
robo_confidence=mapping["robo_confidence"]
)
for mapping in mappings
]
Expand Down
24 changes: 18 additions & 6 deletions src/api/endpoints/annotate/all/get/queries/convert.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,55 @@
import math

Check warning on line 1 in src/api/endpoints/annotate/all/get/queries/convert.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/all/get/queries/convert.py#L1 <100>

Missing docstring in public module
Raw output
./src/api/endpoints/annotate/all/get/queries/convert.py:1:1: D100 Missing docstring in public module
from collections import Counter

from src.api.endpoints.annotate.all.get.models.record_type import RecordTypeAnnotationResponseOuterInfo, \
RecordTypeSuggestionModel
from src.api.endpoints.annotate.all.get.models.url_type import URLTypeAnnotationSuggestion
from src.core.enums import RecordType
from src.db.models.impl.annotation.record_type.anon.sqlalchemy import AnnotationRecordTypeAnon
from src.db.models.impl.annotation.url_type.anon.sqlalchemy import AnnotationURLTypeAnon
from src.db.models.impl.flag.url_validated.enums import URLType
from src.db.models.impl.annotation.record_type.user.user import AnnotationRecordTypeUser
from src.db.models.impl.annotation.url_type.user.sqlalchemy import AnnotationURLTypeUser


def convert_user_url_type_suggestion_to_url_type_annotation_suggestion(
db_suggestions: list[AnnotationURLTypeUser]
user_suggestions: list[AnnotationURLTypeUser],
anon_suggestions: list[AnnotationURLTypeAnon]
) -> list[URLTypeAnnotationSuggestion]:
counter: Counter[URLType] = Counter()
for suggestion in db_suggestions:
for suggestion in user_suggestions:
counter[suggestion.type] += 1

for suggestion in anon_suggestions:
counter[suggestion.url_type] += 0.5

anno_suggestions: list[URLTypeAnnotationSuggestion] = []
for url_type, endorsement_count in counter.most_common(3):
anno_suggestions.append(
URLTypeAnnotationSuggestion(
url_type=url_type,
endorsement_count=endorsement_count,
endorsement_count=math.floor(endorsement_count),
)
)
return anno_suggestions

def convert_user_record_type_suggestion_to_record_type_annotation_suggestion(
db_suggestions: list[AnnotationRecordTypeUser]
user_suggestions: list[AnnotationRecordTypeUser],
anon_suggestions: list[AnnotationRecordTypeAnon]
) -> RecordTypeAnnotationResponseOuterInfo:
counter: Counter[RecordType] = Counter()
for suggestion in db_suggestions:
for suggestion in user_suggestions:
counter[suggestion.record_type] += 1

for suggestion in anon_suggestions:
counter[suggestion.record_type] += 0.5

suggestions: list[RecordTypeSuggestionModel] = []
for record_type, endorsement_count in counter.most_common(3):
suggestions.append(
RecordTypeSuggestionModel(
record_type=record_type,
user_count=endorsement_count,
user_count=math.floor(endorsement_count),
robo_confidence=0,
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ def __init__(
super().__init__()
self.url_id = url_id

# TODO: Test
async def run(self, session: AsyncSession) -> LocationAnnotationResponseOuterInfo:
requester = GetLocationSuggestionsRequester(session)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from src.api.endpoints.annotate.all.get.queries._shared.sort import sort_suggestions
from src.db.helpers.query import exists_url
from src.db.helpers.session import session_helper as sh
from src.db.models.impl.annotation.location.anon.sqlalchemy import AnnotationLocationAnon
from src.db.models.impl.annotation.location.auto.subtask.sqlalchemy import AnnotationLocationAutoSubtask
from src.db.models.impl.annotation.location.auto.suggestion.sqlalchemy import AnnotationLocationAutoSuggestion
from src.db.models.impl.annotation.location.user.sqlalchemy import AnnotationLocationUser
Expand All @@ -29,6 +30,9 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]:
),
exists_url(
AnnotationLocationAutoSubtask
),
exists_url(
AnnotationLocationAnon
)
)
)
Expand All @@ -47,6 +51,20 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]:
)
.cte("user_suggestions")
)
# Number of anon users who suggested each location
anon_suggestions_cte = (
select(
AnnotationLocationAnon.url_id,
AnnotationLocationAnon.location_id,
func.count(AnnotationLocationAnon.session_id).label('anon_count')
)
.group_by(
AnnotationLocationAnon.location_id,
AnnotationLocationAnon.url_id,
)
.cte("anon_suggestions")
)

# Maximum confidence of robo annotation, if any
robo_suggestions_cte = (
select(
Expand Down Expand Up @@ -75,6 +93,7 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]:
LocationExpandedView.full_display_name.label("display_name"),
func.coalesce(user_suggestions_cte.c.user_count, 0).label("user_count"),
func.coalesce(robo_suggestions_cte.c.robo_confidence, 0).label("robo_confidence"),
func.coalesce(anon_suggestions_cte.c.anon_count, 0).label("anon_count"),
)
.join(
LocationExpandedView,
Expand All @@ -87,6 +106,13 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]:
user_suggestions_cte.c.location_id == LocationExpandedView.id
)
)
.outerjoin(
anon_suggestions_cte,
and_(
anon_suggestions_cte.c.url_id == url_id,
anon_suggestions_cte.c.location_id == LocationExpandedView.id
)
)
.outerjoin(
robo_suggestions_cte,
and_(
Expand All @@ -97,15 +123,19 @@ async def get_location_suggestions(self, url_id: int) -> list[SuggestionModel]:
.where(
or_(
user_suggestions_cte.c.user_count > 0,
robo_suggestions_cte.c.robo_confidence > 0
robo_suggestions_cte.c.robo_confidence > 0,
anon_suggestions_cte.c.anon_count > 0
)
)
)

mappings: Sequence[RowMapping] = await self.mappings(joined_suggestions_query)
suggestions: list[SuggestionModel] = [
SuggestionModel(
**mapping
id=mapping["id"],
display_name=mapping["display_name"],
user_count=mapping['user_count'] + (mapping['anon_count'] // 2),
robo_confidence=mapping["robo_confidence"]
)
for mapping in mappings
]
Expand Down
17 changes: 15 additions & 2 deletions src/api/endpoints/annotate/all/get/queries/name/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from src.api.endpoints.annotate.all.get.models.name import NameAnnotationSuggestion, NameAnnotationResponseOuterInfo
from src.db.helpers.session import session_helper as sh
from src.db.models.impl.annotation.name.anon.sqlalchemy import AnnotationNameAnonEndorsement
from src.db.models.impl.annotation.name.suggestion.enums import NameSuggestionSource
from src.db.models.impl.annotation.name.suggestion.sqlalchemy import AnnotationNameSuggestion
from src.db.models.impl.annotation.name.user.sqlalchemy import AnnotationNameUserEndorsement
Expand All @@ -28,6 +29,9 @@ async def run(self, session: AsyncSession) -> NameAnnotationResponseOuterInfo:
func.count(
AnnotationNameUserEndorsement.user_id
).label('user_count'),
func.count(
AnnotationNameAnonEndorsement.session_id
).label('anon_count'),
case(
(AnnotationNameSuggestion.source == NameSuggestionSource.HTML_METADATA_TITLE, 1),
else_=0
Expand All @@ -37,6 +41,10 @@ async def run(self, session: AsyncSession) -> NameAnnotationResponseOuterInfo:
AnnotationNameUserEndorsement,
AnnotationNameUserEndorsement.suggestion_id == AnnotationNameSuggestion.id,
)
.outerjoin(
AnnotationNameAnonEndorsement,
AnnotationNameAnonEndorsement.suggestion_id == AnnotationNameSuggestion.id,
)
.where(
AnnotationNameSuggestion.url_id == self.url_id,
)
Expand All @@ -45,7 +53,9 @@ async def run(self, session: AsyncSession) -> NameAnnotationResponseOuterInfo:
AnnotationNameSuggestion.suggestion,
)
.order_by(
func.count(AnnotationNameUserEndorsement.user_id).desc(),
(func.count(AnnotationNameUserEndorsement.user_id) + func.count(
AnnotationNameUserEndorsement.user_id
)).desc(),
AnnotationNameSuggestion.id.asc(),
)
.limit(3)
Expand All @@ -54,7 +64,10 @@ async def run(self, session: AsyncSession) -> NameAnnotationResponseOuterInfo:
mappings: Sequence[RowMapping] = await sh.mappings(session, query=query)
suggestions = [
NameAnnotationSuggestion(
**mapping
id=mapping["id"],
display_name=mapping["display_name"],
user_count=mapping['user_count'] + (mapping['anon_count'] // 2),
robo_count=mapping["robo_count"]
)
for mapping in mappings
]
Expand Down
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
2 changes: 1 addition & 1 deletion src/db/models/impl/agency/sqlalchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
References an agency in the data sources database.
"""

from sqlalchemy import Column, Integer, String, DateTime, Sequence
from sqlalchemy import Column, String
from sqlalchemy.orm import relationship, Mapped

from src.db.models.helpers import enum_column
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ class AnnotationAutoURLType(

# Relationships

url = relationship("URL", back_populates="auto_relevant_suggestion")
url = relationship("URL")
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ class AnnotationURLTypeUser(

# Relationships

url = relationship("URL", back_populates="user_relevant_suggestions")
url = relationship("URL")
Loading