From 1b0e6c372d8b9d6f83b3cbb35439ea4d1f728440 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Tue, 8 Apr 2025 21:18:48 -0400 Subject: [PATCH 1/4] feat(app): allow retrieving URLs for annotation without html info --- collector_db/AsyncDatabaseClient.py | 1 - tests/conftest.py | 21 +++- .../AlembicRunner.py | 0 tests/helpers/complex_test_data_functions.py | 5 +- tests/test_alembic/conftest.py | 2 +- tests/test_alembic/helpers.py | 2 +- .../integration/api/test_annotate.py | 98 ++++++++++++++++++- 7 files changed, 120 insertions(+), 9 deletions(-) rename tests/{test_alembic => helpers}/AlembicRunner.py (100%) diff --git a/collector_db/AsyncDatabaseClient.py b/collector_db/AsyncDatabaseClient.py index 34ebe7f7..39dba50e 100644 --- a/collector_db/AsyncDatabaseClient.py +++ b/collector_db/AsyncDatabaseClient.py @@ -129,7 +129,6 @@ async def get_next_url_for_user_annotation( URL, ) .where(URL.outcome == URLStatus.PENDING.value) - .where(exists(select(URLHTMLContent).where(URLHTMLContent.url_id == URL.id))) # URL must not have metadata annotation by this user .where( not_( diff --git a/tests/conftest.py b/tests/conftest.py index 6181dd50..3e33d57a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,11 +1,13 @@ import pytest from alembic import command from alembic.config import Config -from sqlalchemy import create_engine +from sqlalchemy import create_engine, inspect, MetaData +from sqlalchemy.orm import scoped_session, sessionmaker from collector_db.DatabaseClient import DatabaseClient from collector_db.helper_functions import get_postgres_connection_string from collector_db.models import Base +from helpers.AlembicRunner import AlembicRunner from tests.helpers.DBDataCreator import DBDataCreator @@ -19,7 +21,22 @@ def setup_and_teardown(): "sqlalchemy.url", get_postgres_connection_string() ) - command.upgrade(alembic_cfg, "head") + live_connection = engine.connect() + runner = AlembicRunner( + alembic_config=alembic_cfg, + inspector=inspect(live_connection), + metadata=MetaData(), + connection=live_connection, + session=scoped_session(sessionmaker(bind=live_connection)), + ) + try: + runner.upgrade("head") + except Exception as e: + runner.reset_schema() + runner.stamp("base") + runner.upgrade("head") + + live_connection.close() engine.dispose() yield diff --git a/tests/test_alembic/AlembicRunner.py b/tests/helpers/AlembicRunner.py similarity index 100% rename from tests/test_alembic/AlembicRunner.py rename to tests/helpers/AlembicRunner.py diff --git a/tests/helpers/complex_test_data_functions.py b/tests/helpers/complex_test_data_functions.py index 57fd6b96..18d3f92a 100644 --- a/tests/helpers/complex_test_data_functions.py +++ b/tests/helpers/complex_test_data_functions.py @@ -36,11 +36,12 @@ class AnnotateAgencySetupInfo(BaseModel): async def setup_for_annotate_agency( db_data_creator: DBDataCreator, url_count: int, - suggestion_type: SuggestionType = SuggestionType.UNKNOWN + suggestion_type: SuggestionType = SuggestionType.UNKNOWN, + with_html_content: bool = True ): buci: BatchURLCreationInfo = await db_data_creator.batch_and_urls( url_count=url_count, - with_html_content=True + with_html_content=with_html_content ) await db_data_creator.auto_suggestions( url_ids=buci.url_ids, diff --git a/tests/test_alembic/conftest.py b/tests/test_alembic/conftest.py index 11b75b92..ff0591d1 100644 --- a/tests/test_alembic/conftest.py +++ b/tests/test_alembic/conftest.py @@ -4,7 +4,7 @@ from sqlalchemy.orm import scoped_session, sessionmaker from collector_db.helper_functions import get_postgres_connection_string -from tests.test_alembic.AlembicRunner import AlembicRunner +from helpers.AlembicRunner import AlembicRunner @pytest.fixture() diff --git a/tests/test_alembic/helpers.py b/tests/test_alembic/helpers.py index d6b2bea4..32d67321 100644 --- a/tests/test_alembic/helpers.py +++ b/tests/test_alembic/helpers.py @@ -3,7 +3,7 @@ from sqlalchemy import text from sqlalchemy.orm import Session -from tests.test_alembic.AlembicRunner import AlembicRunner +from helpers.AlembicRunner import AlembicRunner def get_enum_values(enum_name: str, session: Session) -> list[str]: diff --git a/tests/test_automated/integration/api/test_annotate.py b/tests/test_automated/integration/api/test_annotate.py index 3d870371..0e462ba5 100644 --- a/tests/test_automated/integration/api/test_annotate.py +++ b/tests/test_automated/integration/api/test_annotate.py @@ -28,8 +28,12 @@ def check_url_mappings_match( def check_html_info_not_empty( html_info: ResponseHTMLInfo ): - assert html_info.description != "" - assert html_info.title != "" + assert not html_info_empty(html_info) + +def html_info_empty( + html_info: ResponseHTMLInfo +) -> bool: + return html_info.description == "" and html_info.title == "" @pytest.mark.asyncio async def test_annotate_relevancy(api_test_helper): @@ -123,6 +127,36 @@ async def test_annotate_relevancy(api_test_helper): assert results[0].relevant is True +@pytest.mark.asyncio +async def test_annotate_relevancy_no_html(api_test_helper): + ath = api_test_helper + + batch_id = ath.db_data_creator.batch() + + # Create 2 URLs with outcome `pending` + iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2) + + url_1 = iui.url_mappings[0] + url_2 = iui.url_mappings[1] + + # Add `Relevancy` attribute with value `True` to 1st URL + await ath.db_data_creator.auto_relevant_suggestions( + url_id=url_1.url_id, + relevant=True + ) + + # Add 'Relevancy' attribute with value `False` to 2nd URL + await ath.db_data_creator.auto_relevant_suggestions( + url_id=url_2.url_id, + relevant=False + ) + + # Call `GET` `/annotate/relevance` and receive next URL + request_info_1: GetNextRelevanceAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_relevance_annotation() + inner_info_1 = request_info_1.next_annotation + + check_url_mappings_match(inner_info_1.url_info, url_1) + assert html_info_empty(inner_info_1.html_info) @pytest.mark.asyncio async def test_annotate_record_type(api_test_helper): @@ -213,6 +247,36 @@ async def test_annotate_record_type(api_test_helper): if result.url_id == inner_info_1.url_info.url_id: assert result.record_type == RecordType.BOOKING_REPORTS.value +@pytest.mark.asyncio +async def test_annotate_record_type_no_html_info(api_test_helper): + ath = api_test_helper + + batch_id = ath.db_data_creator.batch() + + # Create 2 URLs with outcome `pending` + iui: InsertURLsInfo = ath.db_data_creator.urls(batch_id=batch_id, url_count=2) + + url_1 = iui.url_mappings[0] + url_2 = iui.url_mappings[1] + + # Add record type attribute with value `Accident Reports` to 1st URL + await ath.db_data_creator.auto_record_type_suggestions( + url_id=url_1.url_id, + record_type=RecordType.ACCIDENT_REPORTS + ) + + # Add 'Record Type' attribute with value `Dispatch Recordings` to 2nd URL + await ath.db_data_creator.auto_record_type_suggestions( + url_id=url_2.url_id, + record_type=RecordType.DISPATCH_RECORDINGS + ) + + # Call `GET` `/annotate/record-type` and receive next URL + request_info_1: GetNextRecordTypeAnnotationResponseOuterInfo = api_test_helper.request_validator.get_next_record_type_annotation() + inner_info_1 = request_info_1.next_annotation + + check_url_mappings_match(inner_info_1.url_info, url_1) + assert html_info_empty(inner_info_1.html_info) @pytest.mark.asyncio async def test_annotate_agency_multiple_auto_suggestions(api_test_helper): @@ -256,6 +320,36 @@ async def test_annotate_agency_multiple_auto_suggestions(api_test_helper): assert agency_suggestion.locality is not None +@pytest.mark.asyncio +async def test_annotate_agency_multiple_auto_suggestions_no_html(api_test_helper): + """ + Test Scenario: Multiple Auto Suggestions + A URL has multiple Agency Auto Suggestion and has not been annotated by the User + The user should receive all of the auto suggestions with full detail + """ + ath = api_test_helper + buci: BatchURLCreationInfo = await ath.db_data_creator.batch_and_urls( + url_count=1, + with_html_content=False + ) + await ath.db_data_creator.auto_suggestions( + url_ids=buci.url_ids, + num_suggestions=2, + suggestion_type=SuggestionType.AUTO_SUGGESTION + ) + + # User requests next annotation + response = await ath.request_validator.get_next_agency_annotation() + + assert response.next_annotation + next_annotation = response.next_annotation + # Check that url_id matches the one we inserted + assert next_annotation.url_id == buci.url_ids[0] + + # Check that html data is not present + assert next_annotation.html_info.description == "" + assert next_annotation.html_info.title == "" + @pytest.mark.asyncio async def test_annotate_agency_single_unknown_auto_suggestion(api_test_helper): """ From 27581eb9abd422cdd5effdf5e2e98367b75c50a9 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Tue, 8 Apr 2025 21:23:27 -0400 Subject: [PATCH 2/4] Fix import bug --- tests/test_alembic/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_alembic/conftest.py b/tests/test_alembic/conftest.py index ff0591d1..8cd1d0ab 100644 --- a/tests/test_alembic/conftest.py +++ b/tests/test_alembic/conftest.py @@ -4,7 +4,7 @@ from sqlalchemy.orm import scoped_session, sessionmaker from collector_db.helper_functions import get_postgres_connection_string -from helpers.AlembicRunner import AlembicRunner +from tests.helpers.AlembicRunner import AlembicRunner @pytest.fixture() From 0ba8dc14ff2907509fdb0a0c0b6e3e82a1fe74d8 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Tue, 8 Apr 2025 21:26:03 -0400 Subject: [PATCH 3/4] Fix import bug --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 3e33d57a..7cc4291c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,7 +7,7 @@ from collector_db.DatabaseClient import DatabaseClient from collector_db.helper_functions import get_postgres_connection_string from collector_db.models import Base -from helpers.AlembicRunner import AlembicRunner +from tests.helpers.AlembicRunner import AlembicRunner from tests.helpers.DBDataCreator import DBDataCreator From 3275fe38af971bf555d502da09d20f9f3588dffe Mon Sep 17 00:00:00 2001 From: Max Chis Date: Tue, 8 Apr 2025 21:34:21 -0400 Subject: [PATCH 4/4] Fix import bug --- tests/test_alembic/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_alembic/helpers.py b/tests/test_alembic/helpers.py index 32d67321..dfebce07 100644 --- a/tests/test_alembic/helpers.py +++ b/tests/test_alembic/helpers.py @@ -3,7 +3,7 @@ from sqlalchemy import text from sqlalchemy.orm import Session -from helpers.AlembicRunner import AlembicRunner +from tests.helpers.AlembicRunner import AlembicRunner def get_enum_values(enum_name: str, session: Session) -> list[str]: