Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""Set user annotation tables to allow only one annotation per url

Revision ID: 997f5bf53772
Revises: ed06a5633d2e
Create Date: 2025-04-16 19:54:59.798580

"""
from typing import Sequence, Union

from alembic import op


# revision identifiers, used by Alembic.
revision: str = '997f5bf53772'
down_revision: Union[str, None] = 'ed06a5633d2e'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:

Check warning on line 20 in alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py#L20 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py:20:1: D103 Missing docstring in public function
# Delete entries with more than one annotation
# Relevance
op.execute("""
with ranked as(
SELECT
id,
ROW_NUMBER() OVER (PARTITION BY URL_ID ORDER BY id) as rn
FROM
USER_RELEVANT_SUGGESTIONS
)
DELETE FROM user_relevant_suggestions
USING ranked
WHERE USER_RELEVANT_SUGGESTIONS.id = ranked.id
and ranked.rn > 1
""")
# Record Type
op.execute("""
with ranked as(
SELECT
id,
ROW_NUMBER() OVER (PARTITION BY URL_ID ORDER BY id) as rn
FROM
USER_RECORD_TYPE_SUGGESTIONS
)
DELETE FROM user_record_type_suggestions
USING ranked
WHERE USER_RECORD_TYPE_SUGGESTIONS.id = ranked.id
and ranked.rn > 1
""")

# Add unique constraint to url_id column
op.create_unique_constraint('uq_user_relevant_suggestions_url_id', 'user_relevant_suggestions', ['url_id'])
op.create_unique_constraint('uq_user_record_type_suggestions_url_id', 'user_record_type_suggestions', ['url_id'])
op.create_unique_constraint('uq_user_agency_suggestions_url_id', 'user_url_agency_suggestions', ['url_id'])



def downgrade() -> None:

Check warning on line 58 in alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py#L58 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py:58:1: D103 Missing docstring in public function

Check failure on line 58 in alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py#L58 <303>

too many blank lines (3)
Raw output
./alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py:58:1: E303 too many blank lines (3)
op.drop_constraint('uq_user_relevant_suggestions_url_id', 'user_relevant_suggestions', type_='unique')
op.drop_constraint('uq_user_record_type_suggestions_url_id', 'user_record_type_suggestions', type_='unique')
op.drop_constraint('uq_user_agency_suggestions_url_id', 'user_url_agency_suggestions', type_='unique')

Check warning on line 61 in alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py#L61 <292>

no newline at end of file
Raw output
./alembic/versions/2025_04_16_1954-997f5bf53772_set_user_annotation_tables_to_allow_.py:61:107: W292 no newline at end of file
9 changes: 3 additions & 6 deletions collector_db/AsyncDatabaseClient.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,13 @@ async def get_next_url_for_user_annotation(
URL,
)
.where(URL.outcome == URLStatus.PENDING.value)
# URL must not have metadata annotation by this user
# URL must not have user suggestion
.where(
not_(
exists(
select(user_suggestion_model_to_exclude)
.where(
user_suggestion_model_to_exclude.url_id == URL.id,
user_suggestion_model_to_exclude.user_id == user_id
)
)
)
Expand All @@ -158,7 +157,6 @@ async def get_next_url_for_user_annotation(
select(UserRelevantSuggestion)
.where(
UserRelevantSuggestion.url_id == URL.id,
UserRelevantSuggestion.user_id == user_id,
UserRelevantSuggestion.relevant == False
)
)
Expand Down Expand Up @@ -833,15 +831,14 @@ async def get_next_url_agency_for_annotation(
if batch_id is not None:
statement = statement.where(URL.batch_id == batch_id)

# Must not have been annotated by this user
# Must not have been annotated by a user
statement = (
statement.join(UserUrlAgencySuggestion, isouter=True)
.where(
~exists(
select(UserUrlAgencySuggestion).
where(
(UserUrlAgencySuggestion.user_id == user_id) &
(UserUrlAgencySuggestion.url_id == URL.id)
UserUrlAgencySuggestion.url_id == URL.id
).
correlate(URL)
)
Expand Down
2 changes: 2 additions & 0 deletions core/DTOs/GetNextURLForFinalReviewResponse.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class FinalReviewAnnotationRecordTypeInfo(BaseModel):
title="A dictionary, sorted by size and omitting zero values, of all record types suggested by users",
)

# region Agency
class FinalReviewAnnotationAgencyUserInfo(GetNextURLForAgencyAgencyInfo):
count: int = Field(title="Number of times suggested by users")

Expand All @@ -41,6 +42,7 @@ class FinalReviewAnnotationAgencyInfo(BaseModel):
users: Optional[dict[int, FinalReviewAnnotationAgencyUserInfo]] = Field(
title="A list, sorted by size, of all agencies suggested by users",
)
# endregion

class FinalReviewAnnotationInfo(BaseModel):
relevant: FinalReviewAnnotationRelevantInfo = Field(
Expand Down
14 changes: 7 additions & 7 deletions core/TaskManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
from core.DTOs.GetTasksResponse import GetTasksResponse
from core.DTOs.TaskOperatorRunInfo import TaskOperatorRunInfo, TaskOperatorOutcome
from core.FunctionTrigger import FunctionTrigger
from core.classes.AgencyIdentificationTaskOperator import AgencyIdentificationTaskOperator
from core.classes.SubmitApprovedURLTaskOperator import SubmitApprovedURLTaskOperator
from core.classes.TaskOperatorBase import TaskOperatorBase
from core.classes.URLHTMLTaskOperator import URLHTMLTaskOperator
from core.classes.URLMiscellaneousMetadataTaskOperator import URLMiscellaneousMetadataTaskOperator
from core.classes.URLRecordTypeTaskOperator import URLRecordTypeTaskOperator
from core.classes.URLRelevanceHuggingfaceTaskOperator import URLRelevanceHuggingfaceTaskOperator
from core.classes.task_operators.AgencyIdentificationTaskOperator import AgencyIdentificationTaskOperator
from core.classes.task_operators.SubmitApprovedURLTaskOperator import SubmitApprovedURLTaskOperator
from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase
from core.classes.task_operators.URLHTMLTaskOperator import URLHTMLTaskOperator
from core.classes.task_operators.URLMiscellaneousMetadataTaskOperator import URLMiscellaneousMetadataTaskOperator
from core.classes.task_operators.URLRecordTypeTaskOperator import URLRecordTypeTaskOperator
from core.classes.task_operators.URLRelevanceHuggingfaceTaskOperator import URLRelevanceHuggingfaceTaskOperator
from core.enums import BatchStatus
from html_tag_collector.ResponseParser import HTMLResponseParser
from html_tag_collector.URLRequestInterface import URLRequestInterface
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from collector_manager.enums import CollectorType
from core.DTOs.URLAgencySuggestionInfo import URLAgencySuggestionInfo
from core.DTOs.task_data_objects.AgencyIdentificationTDO import AgencyIdentificationTDO
from core.classes.TaskOperatorBase import TaskOperatorBase
from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase
from core.classes.subtasks.AutoGooglerAgencyIdentificationSubtask import AutoGooglerAgencyIdentificationSubtask
from core.classes.subtasks.CKANAgencyIdentificationSubtask import CKANAgencyIdentificationSubtask
from core.classes.subtasks.CommonCrawlerAgencyIdentificationSubtask import CommonCrawlerAgencyIdentificationSubtask
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from collector_db.DTOs.URLErrorInfos import URLErrorPydanticInfo
from collector_db.enums import TaskType
from core.DTOs.task_data_objects.SubmitApprovedURLTDO import SubmitApprovedURLTDO
from core.classes.TaskOperatorBase import TaskOperatorBase
from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase
from pdap_api_client.PDAPClient import PDAPClient


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from collector_db.enums import TaskType
from core.DTOs.task_data_objects.UrlHtmlTDO import UrlHtmlTDO
from core.classes.HTMLContentInfoGetter import HTMLContentInfoGetter
from core.classes.TaskOperatorBase import TaskOperatorBase
from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase
from html_tag_collector.ResponseParser import HTMLResponseParser
from html_tag_collector.URLRequestInterface import URLRequestInterface

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from collector_db.enums import TaskType
from collector_manager.enums import CollectorType
from core.DTOs.task_data_objects.URLMiscellaneousMetadataTDO import URLMiscellaneousMetadataTDO
from core.classes.TaskOperatorBase import TaskOperatorBase
from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase
from core.classes.subtasks.MiscellaneousMetadata.AutoGooglerMiscMetadataSubtask import AutoGooglerMiscMetadataSubtask
from core.classes.subtasks.MiscellaneousMetadata.CKANMiscMetadataSubtask import CKANMiscMetadataSubtask
from core.classes.subtasks.MiscellaneousMetadata.MiscellaneousMetadataSubtaskBase import \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from collector_db.DTOs.URLErrorInfos import URLErrorPydanticInfo
from collector_db.enums import TaskType
from core.DTOs.task_data_objects.URLRecordTypeTDO import URLRecordTypeTDO
from core.classes.TaskOperatorBase import TaskOperatorBase
from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase
from core.enums import RecordType
from llm_api_logic.OpenAIRecordClassifier import OpenAIRecordClassifier

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from collector_db.AsyncDatabaseClient import AsyncDatabaseClient
from collector_db.DTOs.URLMetadataInfo import URLMetadataInfo
from collector_db.DTOs.URLWithHTML import URLWithHTML
from collector_db.enums import URLMetadataAttributeType, ValidationStatus, ValidationSource, TaskType
from collector_db.enums import TaskType
from core.DTOs.task_data_objects.URLRelevanceHuggingfaceTDO import URLRelevanceHuggingfaceTDO
from core.classes.TaskOperatorBase import TaskOperatorBase
from core.classes.task_operators.TaskOperatorBase import TaskOperatorBase
from hugging_face.HuggingFaceInterface import HuggingFaceInterface


Expand Down
Empty file.
13 changes: 4 additions & 9 deletions tests/helpers/complex_test_data_functions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Optional

Check warning on line 1 in tests/helpers/complex_test_data_functions.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/helpers/complex_test_data_functions.py#L1 <100>

Missing docstring in public module
Raw output
./tests/helpers/complex_test_data_functions.py:1:1: D100 Missing docstring in public module

from pydantic import BaseModel

from collector_db.DTOs.InsertURLsInfo import InsertURLsInfo
Expand Down Expand Up @@ -57,7 +59,7 @@

async def setup_for_get_next_url_for_final_review(
db_data_creator: DBDataCreator,
annotation_count: int,
annotation_count: Optional[int] = None,

Check warning on line 62 in tests/helpers/complex_test_data_functions.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] tests/helpers/complex_test_data_functions.py#L62 <100>

Unused argument 'annotation_count'
Raw output
./tests/helpers/complex_test_data_functions.py:62:9: U100 Unused argument 'annotation_count'
include_user_annotations: bool = True,
include_miscellaneous_metadata: bool = True
) -> FinalReviewSetupInfo:
Expand Down Expand Up @@ -109,16 +111,9 @@
)

if include_user_annotations:
await add_relevant_suggestion(annotation_count, True)
await add_relevant_suggestion(1, False)
await add_record_type_suggestion(3, RecordType.ARREST_RECORDS)
await add_record_type_suggestion(2, RecordType.DISPATCH_RECORDINGS)
await add_record_type_suggestion(1, RecordType.ACCIDENT_REPORTS)

if include_user_annotations:
# Add user suggestions for agencies, one suggested by 3 users, another by 2, another by 1
for i in range(annotation_count):
await add_agency_suggestion(i + 1)
await add_agency_suggestion(1)

return FinalReviewSetupInfo(
batch_id=batch_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from collector_db.AsyncDatabaseClient import AsyncDatabaseClient
from collector_db.DTOs.URLInfo import URLInfo
from core.classes.URLHTMLTaskOperator import URLHTMLTaskOperator
from core.classes.task_operators.URLHTMLTaskOperator import URLHTMLTaskOperator
from helpers.DBDataCreator import DBDataCreator
from html_tag_collector.ResponseParser import HTMLResponseParser
from html_tag_collector.RootURLCache import RootURLCache
Expand Down
20 changes: 13 additions & 7 deletions tests/test_automated/integration/api/test_annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ async def test_annotate_relevancy(api_test_helper):
# Validate that the correct relevant value is returned
assert inner_info_1.suggested_relevant is True

# A second user should see the same URL


# Annotate with value 'False' and get next URL
request_info_2: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next(
url_id=inner_info_1.url_info.url_id,
Expand Down Expand Up @@ -106,7 +109,6 @@ async def test_annotate_relevancy(api_test_helper):
assert result_2.relevant is True

# If user submits annotation for same URL, the URL should be overwritten

request_info_4: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.post_relevance_annotation_and_get_next(
url_id=inner_info_1.url_info.url_id,
relevance_annotation_post_info=RelevanceAnnotationPostInfo(
Expand Down Expand Up @@ -420,12 +422,6 @@ async def test_annotate_agency_other_user_annotation(api_test_helper):
)
url_ids = setup_info.url_ids


await ath.db_data_creator.manual_suggestion(
user_id=MOCK_USER_ID + 1,
url_id=url_ids[0],
)

response = await ath.request_validator.get_next_agency_annotation()

assert response.next_annotation
Expand All @@ -440,6 +436,16 @@ async def test_annotate_agency_other_user_annotation(api_test_helper):
# Check that one agency_suggestion exists
assert len(next_annotation.agency_suggestions) == 1

# Test that another user can insert a suggestion
await ath.db_data_creator.manual_suggestion(
user_id=MOCK_USER_ID + 1,
url_id=url_ids[0],
)

# After this, text that our user does not receive this URL
response = await ath.request_validator.get_next_agency_annotation()
assert response.next_annotation is None

@pytest.mark.asyncio
async def test_annotate_agency_submit_and_get_next(api_test_helper):
"""
Expand Down
10 changes: 2 additions & 8 deletions tests/test_automated/integration/api/test_review.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ async def test_review_next_source(api_test_helper):

setup_info = await setup_for_get_next_url_for_final_review(
db_data_creator=ath.db_data_creator,
annotation_count=3,
include_user_annotations=True
)
url_mapping = setup_info.url_mapping
Expand Down Expand Up @@ -47,16 +46,13 @@ async def test_review_next_source(api_test_helper):
annotation_info = result.annotations
relevant_info = annotation_info.relevant
assert relevant_info.auto == True
assert relevant_info.users.relevant == 3
assert relevant_info.users.not_relevant == 1

record_type_info = annotation_info.record_type
assert record_type_info.auto == RecordType.ARREST_RECORDS
user_d = record_type_info.users
assert user_d[RecordType.ARREST_RECORDS] == 3
assert user_d[RecordType.DISPATCH_RECORDINGS] == 2
assert user_d[RecordType.ACCIDENT_REPORTS] == 1
assert list(user_d.keys()) == [RecordType.ARREST_RECORDS, RecordType.DISPATCH_RECORDINGS, RecordType.ACCIDENT_REPORTS]
assert list(user_d.keys()) == [RecordType.ACCIDENT_REPORTS]


agency_info = annotation_info.agency
Expand All @@ -67,9 +63,7 @@ async def test_review_next_source(api_test_helper):
# Check user agency suggestions exist and in descending order of count
user_agency_suggestions = agency_info.users
user_agency_suggestions_as_list = list(user_agency_suggestions.values())
assert len(user_agency_suggestions_as_list) == 3
for i in range(3):
assert user_agency_suggestions_as_list[i].count == 3 - i
assert len(user_agency_suggestions_as_list) == 1

# Check confirmed agencies exist
confirmed_agencies = agency_info.confirmed
Expand Down
Loading