Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions src/api/endpoints/annotate/dtos/agency/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from pydantic import BaseModel

from src.api.endpoints.annotate.dtos.shared.base.response import AnnotationInnerResponseInfoBase
from src.core.enums import SuggestionType
from src.core.tasks.operators.url_html.scraper.parser.dtos.response_html import ResponseHTMLInfo

class GetNextURLForAgencyAgencyInfo(BaseModel):
suggestion_type: SuggestionType
Expand All @@ -13,13 +13,10 @@
county: Optional[str] = None
locality: Optional[str] = None

class GetNextURLForAgencyAnnotationInnerResponse(BaseModel):
url_id: int
url: str
class GetNextURLForAgencyAnnotationInnerResponse(AnnotationInnerResponseInfoBase):

Check warning on line 16 in src/api/endpoints/annotate/dtos/agency/response.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/dtos/agency/response.py#L16 <101>

Missing docstring in public class
Raw output
./src/api/endpoints/annotate/dtos/agency/response.py:16:1: D101 Missing docstring in public class
agency_suggestions: list[
GetNextURLForAgencyAgencyInfo
]
html_info: ResponseHTMLInfo

class GetNextURLForAgencyAnnotationResponse(BaseModel):
next_annotation: Optional[GetNextURLForAgencyAnnotationInnerResponse]
Expand Down
11 changes: 5 additions & 6 deletions src/api/endpoints/annotate/dtos/all/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
from pydantic import Field, BaseModel

from src.api.endpoints.annotate.dtos.agency.response import GetNextURLForAgencyAgencyInfo
from src.api.endpoints.annotate.dtos.shared.base.response import AnnotationInnerResponseInfoBase
from src.core.enums import RecordType
from src.core.tasks.operators.url_html.scraper.parser.dtos.response_html import ResponseHTMLInfo


class GetNextURLForAllAnnotationInnerResponse(BaseModel):
url_id: int
url: str
html_info: ResponseHTMLInfo
agency_suggestions: Optional[list[GetNextURLForAgencyAgencyInfo]]
class GetNextURLForAllAnnotationInnerResponse(AnnotationInnerResponseInfoBase):

Check warning on line 10 in src/api/endpoints/annotate/dtos/all/response.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/dtos/all/response.py#L10 <101>

Missing docstring in public class
Raw output
./src/api/endpoints/annotate/dtos/all/response.py:10:1: D101 Missing docstring in public class
agency_suggestions: Optional[list[GetNextURLForAgencyAgencyInfo]] = Field(
title="The auto-labeler's suggestions for agencies"
)
suggested_relevant: Optional[bool] = Field(
title="Whether the auto-labeler identified the URL as relevant or not"
)
Expand Down
9 changes: 2 additions & 7 deletions src/api/endpoints/annotate/dtos/record_type/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,16 @@

from pydantic import Field, BaseModel

from src.api.endpoints.annotate.dtos.shared.base.response import AnnotationInnerResponseInfoBase
from src.db.dtos.url_mapping import URLMapping
from src.core.enums import RecordType
from src.core.tasks.operators.url_html.scraper.parser.dtos.response_html import ResponseHTMLInfo


class GetNextRecordTypeAnnotationResponseInfo(BaseModel):
url_info: URLMapping = Field(
title="Information about the URL"
)
class GetNextRecordTypeAnnotationResponseInfo(AnnotationInnerResponseInfoBase):

Check warning on line 11 in src/api/endpoints/annotate/dtos/record_type/response.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/dtos/record_type/response.py#L11 <101>

Missing docstring in public class
Raw output
./src/api/endpoints/annotate/dtos/record_type/response.py:11:1: D101 Missing docstring in public class
suggested_record_type: Optional[RecordType] = Field(
title="What record type, if any, the auto-labeler identified the URL as"
)
html_info: ResponseHTMLInfo = Field(
title="HTML information about the URL"
)

class GetNextRecordTypeAnnotationResponseOuterInfo(BaseModel):
next_annotation: Optional[GetNextRecordTypeAnnotationResponseInfo]
11 changes: 2 additions & 9 deletions src/api/endpoints/annotate/dtos/relevance/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,13 @@

from pydantic import BaseModel, Field

from src.db.dtos.url_mapping import URLMapping
from src.core.tasks.operators.url_html.scraper.parser.dtos.response_html import ResponseHTMLInfo
from src.api.endpoints.annotate.dtos.shared.base.response import AnnotationInnerResponseInfoBase


class GetNextRelevanceAnnotationResponseInfo(BaseModel):
url_info: URLMapping = Field(
title="Information about the URL"
)
class GetNextRelevanceAnnotationResponseInfo(AnnotationInnerResponseInfoBase):

Check warning on line 8 in src/api/endpoints/annotate/dtos/relevance/response.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/dtos/relevance/response.py#L8 <101>

Missing docstring in public class
Raw output
./src/api/endpoints/annotate/dtos/relevance/response.py:8:1: D101 Missing docstring in public class
suggested_relevant: Optional[bool] = Field(
title="Whether the auto-labeler identified the URL as relevant or not"
)
html_info: ResponseHTMLInfo = Field(
title="HTML information about the URL"
)

class GetNextRelevanceAnnotationResponseOuterInfo(BaseModel):
next_annotation: Optional[GetNextRelevanceAnnotationResponseInfo]
Empty file.
Empty file.
19 changes: 19 additions & 0 deletions src/api/endpoints/annotate/dtos/shared/base/response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from typing import Optional

Check warning on line 1 in src/api/endpoints/annotate/dtos/shared/base/response.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/dtos/shared/base/response.py#L1 <100>

Missing docstring in public module
Raw output
./src/api/endpoints/annotate/dtos/shared/base/response.py:1:1: D100 Missing docstring in public module

from pydantic import BaseModel, Field

from src.api.endpoints.annotate.dtos.shared.batch import AnnotationBatchInfo
from src.core.tasks.operators.url_html.scraper.parser.dtos.response_html import ResponseHTMLInfo
from src.db.dtos.url_mapping import URLMapping


class AnnotationInnerResponseInfoBase(BaseModel):

Check warning on line 10 in src/api/endpoints/annotate/dtos/shared/base/response.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/dtos/shared/base/response.py#L10 <101>

Missing docstring in public class
Raw output
./src/api/endpoints/annotate/dtos/shared/base/response.py:10:1: D101 Missing docstring in public class
url_info: URLMapping = Field(
title="Information about the URL"
)
html_info: ResponseHTMLInfo = Field(
title="HTML information about the URL"
)
batch_info: Optional[AnnotationBatchInfo] = Field(
title="Information about the annotation batch"
)

Check warning on line 19 in src/api/endpoints/annotate/dtos/shared/base/response.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/dtos/shared/base/response.py#L19 <292>

no newline at end of file
Raw output
./src/api/endpoints/annotate/dtos/shared/base/response.py:19:6: W292 no newline at end of file
7 changes: 7 additions & 0 deletions src/api/endpoints/annotate/dtos/shared/batch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from pydantic import BaseModel

Check warning on line 1 in src/api/endpoints/annotate/dtos/shared/batch.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/dtos/shared/batch.py#L1 <100>

Missing docstring in public module
Raw output
./src/api/endpoints/annotate/dtos/shared/batch.py:1:1: D100 Missing docstring in public module


class AnnotationBatchInfo(BaseModel):

Check warning on line 4 in src/api/endpoints/annotate/dtos/shared/batch.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/dtos/shared/batch.py#L4 <101>

Missing docstring in public class
Raw output
./src/api/endpoints/annotate/dtos/shared/batch.py:4:1: D101 Missing docstring in public class
count_annotated: int
total_urls: int
count_not_annotated: int

Check warning on line 7 in src/api/endpoints/annotate/dtos/shared/batch.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/dtos/shared/batch.py#L7 <292>

no newline at end of file
Raw output
./src/api/endpoints/annotate/dtos/shared/batch.py:7:29: W292 no newline at end of file
108 changes: 99 additions & 9 deletions src/db/client/async_.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
GetNextURLForAllAnnotationInnerResponse
from src.api.endpoints.annotate.dtos.record_type.response import GetNextRecordTypeAnnotationResponseInfo
from src.api.endpoints.annotate.dtos.relevance.response import GetNextRelevanceAnnotationResponseInfo
from src.api.endpoints.annotate.dtos.shared.batch import AnnotationBatchInfo
from src.api.endpoints.collector.dtos.manual_batch.post import ManualBatchInputDTO
from src.api.endpoints.collector.dtos.manual_batch.response import ManualBatchResponseDTO
from src.api.endpoints.metrics.dtos.get.backlog import GetMetricsBacklogResponseDTO, GetMetricsBacklogResponseInnerDTO
Expand Down Expand Up @@ -68,6 +69,7 @@
from src.core.tasks.operators.url_miscellaneous_metadata.tdo import URLMiscellaneousMetadataTDO, URLHTMLMetadataInfo
from src.core.env_var_manager import EnvVarManager
from src.core.enums import BatchStatus, SuggestionType, RecordType, SuggestedStatus
from src.db.types import UserSuggestionType

# Type Hints

Expand Down Expand Up @@ -257,7 +259,14 @@
url_id=url.id
),
suggested_relevant=suggestion,
html_info=html_response_info
html_info=html_response_info,
batch_info=await self.get_annotation_batch_info(
session,
batch_id=batch_id,
models=[
UserRelevantSuggestion
]
)
)

#endregion relevant
Expand Down Expand Up @@ -298,7 +307,14 @@
url_id=url.id
),
suggested_record_type=suggestion,
html_info=html_response_info
html_info=html_response_info,
batch_info=await self.get_annotation_batch_info(
session,
batch_id=batch_id,
models=[
UserRecordTypeSuggestion,
]
)
)


Expand Down Expand Up @@ -916,10 +932,19 @@

return GetNextURLForAgencyAnnotationResponse(
next_annotation=GetNextURLForAgencyAnnotationInnerResponse(
url_id=url_id,
url=url,
url_info=URLMapping(
url=url,
url_id=url_id
),
html_info=response_html_info,
agency_suggestions=agency_suggestions
agency_suggestions=agency_suggestions,
batch_info=await self.get_annotation_batch_info(
session,
batch_id=batch_id,
models=[
UserUrlAgencySuggestion,
]
)
)
)

Expand Down Expand Up @@ -1747,12 +1772,23 @@

return GetNextURLForAllAnnotationResponse(
next_annotation=GetNextURLForAllAnnotationInnerResponse(
url_id=url.id,
url=url.url,
url_info=URLMapping(
url_id=url.id,
url=url.url
),
html_info=html_response_info,
suggested_relevant=auto_relevant,
suggested_record_type=auto_record_type,
agency_suggestions=agency_suggestions
agency_suggestions=agency_suggestions,
batch_info=await self.get_annotation_batch_info(
session,
batch_id=batch_id,
models=[
UserUrlAgencySuggestion,
UserRecordTypeSuggestion,
UserRelevantSuggestion
]
)
)
)

Expand Down Expand Up @@ -2380,4 +2416,58 @@

raw_result = await session.execute(query)
urls = raw_result.scalars().all()
return [URL404ProbeTDO(url=url.url, url_id=url.id) for url in urls]
return [URL404ProbeTDO(url=url.url, url_id=url.id) for url in urls]

@staticmethod
async def get_annotation_batch_info(

Check warning on line 2422 in src/db/client/async_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/db/client/async_.py#L2422 <102>

Missing docstring in public method
Raw output
./src/db/client/async_.py:2422:1: D102 Missing docstring in public method
session: AsyncSession,
batch_id: Optional[int],
models: List[UserSuggestionType]
) -> Optional[AnnotationBatchInfo]:
if batch_id is None:
return None

sc = StatementComposer
include_queries = [
sc.user_suggestion_exists(model)
for model in models
]

select_url = select(func.count(URL.id))

common_where_clause = [
URL.outcome == URLStatus.PENDING.value,
URL.batch_id == batch_id,
]

Check failure on line 2441 in src/db/client/async_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/db/client/async_.py#L2441 <123>

closing bracket does not match indentation of opening bracket's line
Raw output
./src/db/client/async_.py:2441:13: E123 closing bracket does not match indentation of opening bracket's line

annotated_query = (
select_url
.where(
*common_where_clause,
*include_queries,
)
)

exclude_queries = [
sc.user_suggestion_not_exists(model)
for model in models
]

not_annotated_query = (
select_url
.where(
*common_where_clause,
*exclude_queries,
)
)

annotated_result_raw = await session.execute(annotated_query)
annotated_result = annotated_result_raw.scalars().one_or_none()
not_annotated_result_raw = await session.execute(not_annotated_query)
not_annotated_result = not_annotated_result_raw.scalars().one_or_none()

return AnnotationBatchInfo(
count_annotated=annotated_result,
count_not_annotated=not_annotated_result,
total_urls=annotated_result + not_annotated_result
)

Check warning on line 2473 in src/db/client/async_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/db/client/async_.py#L2473 <292>

no newline at end of file
Raw output
./src/db/client/async_.py:2473:10: W292 no newline at end of file
20 changes: 14 additions & 6 deletions src/db/statement_composer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from src.db.models.core import URL, URLHTMLContent, AutomatedUrlAgencySuggestion, URLOptionalDataSourceMetadata, Batch, \
ConfirmedURLAgency, LinkTaskURL, Task, UserUrlAgencySuggestion, UserRecordTypeSuggestion, UserRelevantSuggestion
from src.core.enums import BatchStatus
from src.db.types import UserSuggestionType


class StatementComposer:
Expand Down Expand Up @@ -96,6 +97,18 @@

return query

@staticmethod
def user_suggestion_exists(

Check warning on line 101 in src/db/statement_composer.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/db/statement_composer.py#L101 <102>

Missing docstring in public method
Raw output
./src/db/statement_composer.py:101:1: D102 Missing docstring in public method
model_to_include: UserSuggestionType
) -> ColumnElement[bool]:
subquery = exists(
select(model_to_include)
.where(
model_to_include.url_id == URL.id,
)
)
return subquery


@staticmethod
def user_suggestion_not_exists(
Expand All @@ -106,12 +119,7 @@
#

subquery = not_(
exists(
select(model_to_exclude)
.where(
model_to_exclude.url_id == URL.id,
)
)
StatementComposer.user_suggestion_exists(model_to_exclude)

Check failure on line 122 in src/db/statement_composer.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/db/statement_composer.py#L122 <126>

continuation line over-indented for hanging indent
Raw output
./src/db/statement_composer.py:122:21: E126 continuation line over-indented for hanging indent
)

return subquery
Expand Down
3 changes: 3 additions & 0 deletions src/db/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from src.db.models.core import UserUrlAgencySuggestion, UserRecordTypeSuggestion, UserRelevantSuggestion

Check warning on line 1 in src/db/types.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/db/types.py#L1 <100>

Missing docstring in public module
Raw output
./src/db/types.py:1:1: D100 Missing docstring in public module

UserSuggestionType = UserUrlAgencySuggestion | UserRelevantSuggestion | UserRecordTypeSuggestion

Check warning on line 3 in src/db/types.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/db/types.py#L3 <292>

no newline at end of file
Raw output
./src/db/types.py:3:97: W292 no newline at end of file
14 changes: 7 additions & 7 deletions tests/automated/integration/api/test_annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ async def test_annotate_agency_multiple_auto_suggestions(api_test_helper):
assert response.next_annotation
next_annotation = response.next_annotation
# Check that url_id matches the one we inserted
assert next_annotation.url_id == buci.url_ids[0]
assert next_annotation.url_info.url_id == buci.url_ids[0]

# Check that html data is present
assert next_annotation.html_info.description != ""
Expand Down Expand Up @@ -448,7 +448,7 @@ async def test_annotate_agency_multiple_auto_suggestions_no_html(api_test_helper
assert response.next_annotation
next_annotation = response.next_annotation
# Check that url_id matches the one we inserted
assert next_annotation.url_id == buci.url_ids[0]
assert next_annotation.url_info.url_id == buci.url_ids[0]

# Check that html data is not present
assert next_annotation.html_info.description == ""
Expand Down Expand Up @@ -476,7 +476,7 @@ async def test_annotate_agency_single_unknown_auto_suggestion(api_test_helper):
assert response.next_annotation
next_annotation = response.next_annotation
# Check that url_id matches the one we inserted
assert next_annotation.url_id == buci.url_ids[0]
assert next_annotation.url_info.url_id == buci.url_ids[0]

# Check that html data is present
assert next_annotation.html_info.description != ""
Expand Down Expand Up @@ -532,7 +532,7 @@ async def test_annotate_agency_other_user_annotation(api_test_helper):
assert response.next_annotation
next_annotation = response.next_annotation
# Check that url_id matches the one we inserted
assert next_annotation.url_id == url_ids[0]
assert next_annotation.url_info.url_id == url_ids[0]

# Check that html data is present
assert next_annotation.html_info.description != ""
Expand Down Expand Up @@ -645,7 +645,7 @@ async def test_annotate_all(api_test_helper):
batch_id=setup_info_2.batch_id
)

assert get_response_1.next_annotation.url_id != get_response_2.next_annotation.url_id
assert get_response_1.next_annotation.url_info.url_id != get_response_2.next_annotation.url_info.url_id

# Annotate the first and submit
agency_id = await ath.db_data_creator.agency()
Expand All @@ -663,7 +663,7 @@ async def test_annotate_all(api_test_helper):
assert post_response_1.next_annotation is not None

# Confirm the second is received
assert post_response_1.next_annotation.url_id == url_mapping_2.url_id
assert post_response_1.next_annotation.url_info.url_id == url_mapping_2.url_id

# Upon submitting the second, confirm that no more URLs are returned through either POST or GET
post_response_2 = await ath.request_validator.post_all_annotations_and_get_next(
Expand Down Expand Up @@ -729,7 +729,7 @@ async def test_annotate_all_post_batch_filtering(api_test_helper):
)
)

assert post_response_1.next_annotation.url_id == url_mapping_3.url_id
assert post_response_1.next_annotation.url_info.url_id == url_mapping_3.url_id


@pytest.mark.asyncio
Expand Down
Loading