Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 7 additions & 24 deletions collector_db/AsyncDatabaseClient.py
Original file line number Diff line number Diff line change
Expand Up @@ -1065,19 +1065,6 @@ def count_subquery(model: Type[Base]):
)
)

count_subqueries = [
count_subquery(model=model)
for model in models
]

sum_of_count_subqueries = (
sum(
[
coalesce(subquery.c.count, 0)
for subquery in count_subqueries
]
)
)

# Basic URL query
url_query = (
Expand All @@ -1086,13 +1073,10 @@ def count_subquery(model: Type[Base]):
(
sum_of_exist_subqueries
).label("total_distinct_annotation_count"),
(
sum_of_count_subqueries
).label("total_overall_annotation_count")
)
)

for subquery in (exist_subqueries + count_subqueries):
for subquery in exist_subqueries:
url_query = url_query.outerjoin(
subquery, URL.id == subquery.c.url_id
)
Expand All @@ -1110,8 +1094,8 @@ def count_subquery(model: Type[Base]):
URL.html_content,
URL.auto_record_type_suggestion,
URL.auto_relevant_suggestion,
URL.user_relevant_suggestions,
URL.user_record_type_suggestions,
URL.user_relevant_suggestion,
URL.user_record_type_suggestion,
URL.optional_data_source_metadata,
]

Expand All @@ -1122,7 +1106,7 @@ def count_subquery(model: Type[Base]):
# The below relationships are joined to entities that are joined to the URL
double_join_relationships = [
(URL.automated_agency_suggestions, AutomatedUrlAgencySuggestion.agency),
(URL.user_agency_suggestions, UserUrlAgencySuggestion.agency),
(URL.user_agency_suggestion, UserUrlAgencySuggestion.agency),
(URL.confirmed_agencies, ConfirmedURLAgency.agency)
]
for primary, secondary in double_join_relationships:
Expand All @@ -1134,7 +1118,6 @@ def count_subquery(model: Type[Base]):
# Apply order clause
url_query = url_query.order_by(
desc("total_distinct_annotation_count"),
desc("total_overall_annotation_count"),
asc(URL.id)
)

Expand Down Expand Up @@ -1173,16 +1156,16 @@ def count_subquery(model: Type[Base]):
description=result.description,
annotations=FinalReviewAnnotationInfo(
relevant=DTOConverter.final_review_annotation_relevant_info(
user_suggestions=result.user_relevant_suggestions,
user_suggestion=result.user_relevant_suggestion,
auto_suggestion=result.auto_relevant_suggestion
),
record_type=DTOConverter.final_review_annotation_record_type_info(
user_suggestions=result.user_record_type_suggestions,
user_suggestion=result.user_record_type_suggestion,
auto_suggestion=result.auto_record_type_suggestion
),
agency=DTOConverter.final_review_annotation_agency_info(
automated_agency_suggestions=result.automated_agency_suggestions,
user_agency_suggestions=result.user_agency_suggestions,
user_agency_suggestion=result.user_agency_suggestion,
confirmed_agencies=result.confirmed_agencies
)
),
Expand Down
78 changes: 28 additions & 50 deletions collector_db/DTOConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,15 @@

from collector_db.DTOs.URLHTMLContentInfo import HTMLContentType, URLHTMLContentInfo
from collector_db.DTOs.URLWithHTML import URLWithHTML
from collector_db.enums import ValidationStatus, ValidationSource, URLMetadataAttributeType
from collector_db.models import AutomatedUrlAgencySuggestion, UserUrlAgencySuggestion, URLHTMLContent, URL, Agency, \
AutoRecordTypeSuggestion, UserRecordTypeSuggestion, UserRelevantSuggestion, AutoRelevantSuggestion, \
ConfirmedURLAgency
from core.DTOs.GetNextURLForAgencyAnnotationResponse import GetNextURLForAgencyAgencyInfo
from core.DTOs.GetNextURLForFinalReviewResponse import FinalReviewAnnotationRelevantInfo, \
FinalReviewAnnotationRelevantUsersInfo, FinalReviewAnnotationRecordTypeInfo, FinalReviewAnnotationAgencyAutoInfo, \
FinalReviewAnnotationAgencyInfo, FinalReviewAnnotationAgencyUserInfo
FinalReviewAnnotationRecordTypeInfo, FinalReviewAnnotationAgencyAutoInfo, \
FinalReviewAnnotationAgencyInfo
from core.enums import RecordType, SuggestionType
from html_tag_collector.DataClassTags import convert_to_response_html_info, ResponseHTMLInfo, ENUM_TO_ATTRIBUTE_MAPPING
from html_tag_collector.DataClassTags import ResponseHTMLInfo, ENUM_TO_ATTRIBUTE_MAPPING

class DTOConverter:

Expand All @@ -21,49 +20,35 @@ class DTOConverter:

@staticmethod
def final_review_annotation_relevant_info(
user_suggestions: list[UserRelevantSuggestion],
user_suggestion: UserRelevantSuggestion,
auto_suggestion: AutoRelevantSuggestion
) -> FinalReviewAnnotationRelevantInfo:

auto_value = auto_suggestion.relevant if auto_suggestion else None

relevant_count = 0
not_relevant_count = 0
for suggestion in user_suggestions:
if suggestion.relevant:
relevant_count += 1
else:
not_relevant_count += 1
user_value = user_suggestion.relevant if user_suggestion else None
return FinalReviewAnnotationRelevantInfo(
auto=auto_value,
users=FinalReviewAnnotationRelevantUsersInfo(
relevant=relevant_count,
not_relevant=not_relevant_count
)
user=user_value
)

@staticmethod
def final_review_annotation_record_type_info(
user_suggestions: list[UserRecordTypeSuggestion],
user_suggestion: UserRecordTypeSuggestion,
auto_suggestion: AutoRecordTypeSuggestion
):

user_count = {}
if auto_suggestion is None:
auto_value = None
else:
auto_value = RecordType(auto_suggestion.record_type)
for suggestion in user_suggestions:
value = RecordType(suggestion.record_type)
if value not in user_count:
user_count[value] = 0
user_count[value] += 1
# Sort users by count, descending
user_count = dict(sorted(user_count.items(), key=lambda x: x[1], reverse=True))
if user_suggestion is None:
user_value = None
else:
user_value = RecordType(user_suggestion.record_type)

return FinalReviewAnnotationRecordTypeInfo(
auto=auto_value,
users=user_count
user=user_value
)

@staticmethod
Expand Down Expand Up @@ -109,27 +94,20 @@ def final_review_annotation_agency_auto_info(

@staticmethod
def user_url_agency_suggestion_to_final_review_annotation_agency_user_info(
user_url_agency_suggestions: list[UserUrlAgencySuggestion]
) -> dict[int, FinalReviewAnnotationAgencyUserInfo]:
d = {}
for suggestion in user_url_agency_suggestions:
agency = suggestion.agency
agency_id = agency.agency_id
if agency.agency_id not in d:
d[agency_id] = FinalReviewAnnotationAgencyUserInfo(
suggestion_type=SuggestionType.MANUAL_SUGGESTION,
agency_name=agency.name,
pdap_agency_id=agency_id,
state=agency.state,
county=agency.county,
locality=agency.locality,
count=1
)
else:
d[agency_id].count += 1
user_url_agency_suggestion: UserUrlAgencySuggestion
) -> Optional[GetNextURLForAgencyAgencyInfo]:
suggestion = user_url_agency_suggestion
if suggestion is None:
return None
return GetNextURLForAgencyAgencyInfo(
suggestion_type=SuggestionType.MANUAL_SUGGESTION,
pdap_agency_id=suggestion.agency_id,
agency_name=suggestion.agency.name,
state=suggestion.agency.state,
county=suggestion.agency.county,
locality=suggestion.agency.locality
)

# Return sorted
return dict(sorted(d.items(), key=lambda x: x[1].count, reverse=True))

@staticmethod
def confirmed_agencies_to_final_review_annotation_agency_info(
Expand All @@ -154,7 +132,7 @@ def confirmed_agencies_to_final_review_annotation_agency_info(
def final_review_annotation_agency_info(
automated_agency_suggestions: list[AutomatedUrlAgencySuggestion],
confirmed_agencies: list[ConfirmedURLAgency],
user_agency_suggestions: list[UserUrlAgencySuggestion]
user_agency_suggestion: UserUrlAgencySuggestion
):

confirmed_agency_info = DTOConverter.confirmed_agencies_to_final_review_annotation_agency_info(
Expand All @@ -166,12 +144,12 @@ def final_review_annotation_agency_info(
)

agency_user_info = DTOConverter.user_url_agency_suggestion_to_final_review_annotation_agency_user_info(
user_agency_suggestions
user_agency_suggestion
)

return FinalReviewAnnotationAgencyInfo(
confirmed=confirmed_agency_info,
users=agency_user_info,
user=agency_user_info,
auto=agency_auto_info
)

Expand Down
18 changes: 9 additions & 9 deletions collector_db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,16 +119,16 @@
)
automated_agency_suggestions = relationship(
"AutomatedUrlAgencySuggestion", back_populates="url")
user_agency_suggestions = relationship(
"UserUrlAgencySuggestion", back_populates="url")
user_agency_suggestion = relationship(
"UserUrlAgencySuggestion", uselist=False, back_populates="url")
auto_record_type_suggestion = relationship(
"AutoRecordTypeSuggestion", uselist=False, back_populates="url")
user_record_type_suggestions = relationship(
"UserRecordTypeSuggestion", back_populates="url")
user_record_type_suggestion = relationship(
"UserRecordTypeSuggestion", uselist=False, back_populates="url")
auto_relevant_suggestion = relationship(
"AutoRelevantSuggestion", uselist=False, back_populates="url")
user_relevant_suggestions = relationship(
"UserRelevantSuggestion", back_populates="url")
user_relevant_suggestion = relationship(
"UserRelevantSuggestion", uselist=False, back_populates="url")
reviewing_user = relationship(
"ReviewingUserURL", uselist=False, back_populates="url")
optional_data_source_metadata = relationship(
Expand Down Expand Up @@ -375,7 +375,7 @@
is_new = Column(Boolean, nullable=True)

agency = relationship("Agency", back_populates="user_suggestions")
url = relationship("URL", back_populates="user_agency_suggestions")
url = relationship("URL", back_populates="user_agency_suggestion")

__table_args__ = (
UniqueConstraint("agency_id", "url_id", "user_id", name="uq_user_url_agency_suggestions"),
Expand Down Expand Up @@ -432,7 +432,7 @@

# Relationships

url = relationship("URL", back_populates="user_relevant_suggestions")
url = relationship("URL", back_populates="user_relevant_suggestion")


class UserRecordTypeSuggestion(Base):
Expand All @@ -451,4 +451,4 @@

# Relationships

url = relationship("URL", back_populates="user_record_type_suggestions")
url = relationship("URL", back_populates="user_record_type_suggestion")

Check warning on line 454 in collector_db/models.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] collector_db/models.py#L454 <292>

no newline at end of file
Raw output
./collector_db/models.py:454:76: W292 no newline at end of file
23 changes: 9 additions & 14 deletions core/DTOs/GetNextURLForFinalReviewResponse.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,21 @@
from core.enums import RecordType
from html_tag_collector.DataClassTags import ResponseHTMLInfo


class FinalReviewAnnotationRelevantUsersInfo(BaseModel):
relevant: int = Field(title="Number of users who marked the URL as relevant")
not_relevant: int = Field(title="Number of users who marked the URL as not relevant")

class FinalReviewAnnotationRelevantInfo(BaseModel):
auto: Optional[bool] = Field(title="Whether the auto-labeler has marked the URL as relevant")
users: FinalReviewAnnotationRelevantUsersInfo = Field(
title="How users identified the relevancy of the source",
user: Optional[bool] = Field(
title="Whether a user has marked the URL as relevant",
)

class FinalReviewAnnotationRecordTypeInfo(BaseModel):
auto: Optional[RecordType] = Field(title="The record type suggested by the auto-labeler")
users: dict[RecordType, int] = Field(
title="A dictionary, sorted by size and omitting zero values, of all record types suggested by users",
auto: Optional[RecordType] = Field(
title="The record type suggested by the auto-labeler"
)
user: Optional[RecordType] = Field(
title="The record type suggested by a user",
)

# region Agency
class FinalReviewAnnotationAgencyUserInfo(GetNextURLForAgencyAgencyInfo):
count: int = Field(title="Number of times suggested by users")

class FinalReviewAnnotationAgencyAutoInfo(BaseModel):
unknown: bool = Field(title="Whether the auto-labeler suggested the URL as unknown")
Expand All @@ -39,8 +34,8 @@ class FinalReviewAnnotationAgencyInfo(BaseModel):
)
auto: Optional[FinalReviewAnnotationAgencyAutoInfo] = Field(
title="A single agency or a list of agencies suggested by the auto-labeler",)
users: Optional[dict[int, FinalReviewAnnotationAgencyUserInfo]] = Field(
title="A list, sorted by size, of all agencies suggested by users",
user: Optional[GetNextURLForAgencyAgencyInfo] = Field(
title="A single agency suggested by a user",
)
# endregion

Expand Down
50 changes: 26 additions & 24 deletions tests/helpers/complex_test_data_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ async def setup_for_annotate_agency(
class FinalReviewSetupInfo(BaseModel):
batch_id: int
url_mapping: URLMapping
user_agency_id: Optional[int]

async def setup_for_get_next_url_for_final_review(
db_data_creator: DBDataCreator,
Expand All @@ -78,27 +79,25 @@ async def setup_for_get_next_url_for_final_review(
await db_data_creator.url_miscellaneous_metadata(url_id=url_mapping.url_id)
await db_data_creator.html_data([url_mapping.url_id])

async def add_agency_suggestion(count: int):
async def add_agency_suggestion() -> int:
agency_id = await db_data_creator.agency()
for i in range(count):
await db_data_creator.agency_user_suggestions(
url_id=url_mapping.url_id,
agency_id=agency_id
)

async def add_record_type_suggestion(count: int, record_type: RecordType):
for i in range(count):
await db_data_creator.user_record_type_suggestion(
url_id=url_mapping.url_id,
record_type=record_type
)

async def add_relevant_suggestion(count: int, relevant: bool):
for i in range(count):
await db_data_creator.user_relevant_suggestion(
url_id=url_mapping.url_id,
relevant=relevant
)
await db_data_creator.agency_user_suggestions(
url_id=url_mapping.url_id,
agency_id=agency_id
)
return agency_id

async def add_record_type_suggestion(record_type: RecordType):
await db_data_creator.user_record_type_suggestion(
url_id=url_mapping.url_id,
record_type=record_type
)

async def add_relevant_suggestion(relevant: bool):
await db_data_creator.user_relevant_suggestion(
url_id=url_mapping.url_id,
relevant=relevant
)

await db_data_creator.auto_relevant_suggestions(
url_id=url_mapping.url_id,
Expand All @@ -111,11 +110,14 @@ async def add_relevant_suggestion(count: int, relevant: bool):
)

if include_user_annotations:
await add_relevant_suggestion(1, False)
await add_record_type_suggestion(1, RecordType.ACCIDENT_REPORTS)
await add_agency_suggestion(1)
await add_relevant_suggestion(False)
await add_record_type_suggestion(RecordType.ACCIDENT_REPORTS)
user_agency_id = await add_agency_suggestion()
else:
user_agency_id = None

return FinalReviewSetupInfo(
batch_id=batch_id,
url_mapping=url_mapping
url_mapping=url_mapping,
user_agency_id=user_agency_id
)
Loading