From 6858bc0000f4d454b9c8ea228ff81ff76a8a5811 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Thu, 17 Apr 2025 09:21:23 -0400 Subject: [PATCH] feat(app): change batch status `completed` to `ready to label` --- ...hange_batch_completed_to_ready_to_label.py | 36 +++++++++++++++++++ collector_db/StatementComposer.py | 2 +- collector_db/models.py | 2 +- collector_manager/AsyncCollectorBase.py | 2 +- core/TaskManager.py | 2 +- core/enums.py | 2 +- .../lifecycle/test_auto_googler_lifecycle.py | 2 +- .../core/lifecycle/test_ckan_lifecycle.py | 2 +- .../test_common_crawler_lifecycle.py | 2 +- .../lifecycle/test_muckrock_lifecycles.py | 6 ++-- .../integration/api/test_example_collector.py | 6 ++-- .../integration/core/test_async_core.py | 4 +-- .../core/test_example_collector_lifecycle.py | 6 ++-- util/alembic_helpers.py | 13 ++++++- 14 files changed, 67 insertions(+), 20 deletions(-) create mode 100644 alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py diff --git a/alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py b/alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py new file mode 100644 index 00000000..882c2c5f --- /dev/null +++ b/alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py @@ -0,0 +1,36 @@ +"""Change batch completed to ready to label + +Revision ID: e285e6e7cf71 +Revises: 997f5bf53772 +Create Date: 2025-04-17 09:09:38.137131 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from util.alembic_helpers import switch_enum_type, alter_enum_value + +# revision identifiers, used by Alembic. +revision: str = 'e285e6e7cf71' +down_revision: Union[str, None] = '997f5bf53772' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + alter_enum_value( + enum_name="batch_status", + old_value="complete", + new_value="ready to label" + ) + + + +def downgrade() -> None: + alter_enum_value( + enum_name="batch_status", + old_value="ready to label", + new_value="complete" + ) diff --git a/collector_db/StatementComposer.py b/collector_db/StatementComposer.py index d108a3fa..e25ba5d4 100644 --- a/collector_db/StatementComposer.py +++ b/collector_db/StatementComposer.py @@ -22,7 +22,7 @@ def pending_urls_without_html_data() -> Select: join(Task, LinkTaskURL.task_id == Task.id). where(LinkTaskURL.url_id == URL.id). where(Task.task_type == TaskType.HTML.value). - where(Task.task_status == BatchStatus.COMPLETE.value) + where(Task.task_status == BatchStatus.READY_TO_LABEL.value) ) query = ( select(URL). diff --git a/collector_db/models.py b/collector_db/models.py index 4ac117d6..42b113c6 100644 --- a/collector_db/models.py +++ b/collector_db/models.py @@ -17,7 +17,7 @@ CURRENT_TIME_SERVER_DEFAULT = func.now() -batch_status_enum = PGEnum('complete', 'error', 'in-process', 'aborted', name='batch_status') +batch_status_enum = PGEnum('ready to label', 'error', 'in-process', 'aborted', name='batch_status') record_type_values = get_enum_values(RecordType) diff --git a/collector_manager/AsyncCollectorBase.py b/collector_manager/AsyncCollectorBase.py index fe260266..099f5338 100644 --- a/collector_manager/AsyncCollectorBase.py +++ b/collector_manager/AsyncCollectorBase.py @@ -131,4 +131,4 @@ async def log( )) async def close(self) -> None: - self.status = BatchStatus.COMPLETE + self.status = BatchStatus.READY_TO_LABEL diff --git a/core/TaskManager.py b/core/TaskManager.py index 8a40b129..429375c2 100644 --- a/core/TaskManager.py +++ b/core/TaskManager.py @@ -159,7 +159,7 @@ async def handle_outcome(self, run_info: TaskOperatorRunInfo): case TaskOperatorOutcome.SUCCESS: await self.adb_client.update_task_status( task_id=run_info.task_id, - status=BatchStatus.COMPLETE + status=BatchStatus.READY_TO_LABEL ) async def handle_task_error(self, run_info: TaskOperatorRunInfo): diff --git a/core/enums.py b/core/enums.py index cfccbb92..714b1d03 100644 --- a/core/enums.py +++ b/core/enums.py @@ -2,7 +2,7 @@ class BatchStatus(Enum): - COMPLETE = "complete" + READY_TO_LABEL = "ready to label" IN_PROCESS = "in-process" ERROR = "error" ABORTED = "aborted" diff --git a/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py b/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py index f2b2c098..9e5c0e49 100644 --- a/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py +++ b/tests/manual/core/lifecycle/test_auto_googler_lifecycle.py @@ -32,7 +32,7 @@ def test_auto_googler_collector_lifecycle(test_core): batch_info: BatchInfo = api.dependencies.db_client.get_batch_by_id(1) assert batch_info.strategy == "auto_googler" - assert batch_info.status == BatchStatus.COMPLETE + assert batch_info.status == BatchStatus.READY_TO_LABEL assert batch_info.total_url_count == 20 url_infos = db_client.get_urls_by_batch(1) diff --git a/tests/manual/core/lifecycle/test_ckan_lifecycle.py b/tests/manual/core/lifecycle/test_ckan_lifecycle.py index 575dedfa..4e87bbbd 100644 --- a/tests/manual/core/lifecycle/test_ckan_lifecycle.py +++ b/tests/manual/core/lifecycle/test_ckan_lifecycle.py @@ -24,7 +24,7 @@ def test_ckan_lifecycle(test_core): batch_info: BatchInfo = db_client.get_batch_by_id(1) assert batch_info.strategy == "ckan" - assert batch_info.status == BatchStatus.COMPLETE + assert batch_info.status == BatchStatus.READY_TO_LABEL assert batch_info.total_url_count >= 3000 url_infos = db_client.get_urls_by_batch(1) diff --git a/tests/manual/core/lifecycle/test_common_crawler_lifecycle.py b/tests/manual/core/lifecycle/test_common_crawler_lifecycle.py index d2ee4495..03fe5855 100644 --- a/tests/manual/core/lifecycle/test_common_crawler_lifecycle.py +++ b/tests/manual/core/lifecycle/test_common_crawler_lifecycle.py @@ -34,7 +34,7 @@ def test_common_crawler_lifecycle(test_core: SourceCollectorCore): batch_info = db_client.get_batch_by_id(1) assert batch_info.strategy == "common_crawler" - assert batch_info.status == BatchStatus.COMPLETE + assert batch_info.status == BatchStatus.READY_TO_LABEL assert batch_info.parameters == config url_infos = db_client.get_urls_by_batch(1) diff --git a/tests/manual/core/lifecycle/test_muckrock_lifecycles.py b/tests/manual/core/lifecycle/test_muckrock_lifecycles.py index b688b0a8..72d2d9fc 100644 --- a/tests/manual/core/lifecycle/test_muckrock_lifecycles.py +++ b/tests/manual/core/lifecycle/test_muckrock_lifecycles.py @@ -23,7 +23,7 @@ def test_muckrock_simple_search_collector_lifecycle(test_core): batch_info: BatchInfo = db_client.get_batch_by_id(1) assert batch_info.strategy == "muckrock_simple_search" - assert batch_info.status == BatchStatus.COMPLETE + assert batch_info.status == BatchStatus.READY_TO_LABEL assert batch_info.total_url_count >= 10 url_infos = db_client.get_urls_by_batch(1) @@ -45,7 +45,7 @@ def test_muckrock_county_level_search_collector_lifecycle(test_core): batch_info: BatchInfo = db_client.get_batch_by_id(1) assert batch_info.strategy == "muckrock_county_search" - assert batch_info.status == BatchStatus.COMPLETE + assert batch_info.status == BatchStatus.READY_TO_LABEL assert batch_info.total_url_count >= 10 url_infos = db_client.get_urls_by_batch(1) @@ -67,7 +67,7 @@ def test_muckrock_full_search_collector_lifecycle(test_core): batch_info: BatchInfo = db_client.get_batch_by_id(1) assert batch_info.strategy == CollectorType.MUCKROCK_ALL_SEARCH.value - assert batch_info.status == BatchStatus.COMPLETE + assert batch_info.status == BatchStatus.READY_TO_LABEL assert batch_info.total_url_count >= 1 url_infos = db_client.get_urls_by_batch(1) diff --git a/tests/test_automated/integration/api/test_example_collector.py b/tests/test_automated/integration/api/test_example_collector.py index d1466c8c..b13f7e31 100644 --- a/tests/test_automated/integration/api/test_example_collector.py +++ b/tests/test_automated/integration/api/test_example_collector.py @@ -54,7 +54,7 @@ async def test_example_collector(api_test_helper): csr: GetBatchStatusResponse = ath.request_validator.get_batch_statuses( collector_type=CollectorType.EXAMPLE, - status=BatchStatus.COMPLETE + status=BatchStatus.READY_TO_LABEL ) assert len(csr.results) == 1 @@ -62,10 +62,10 @@ async def test_example_collector(api_test_helper): assert bsi.id == batch_id assert bsi.strategy == CollectorType.EXAMPLE.value - assert bsi.status == BatchStatus.COMPLETE + assert bsi.status == BatchStatus.READY_TO_LABEL bi: BatchInfo = ath.request_validator.get_batch_info(batch_id=batch_id) - assert bi.status == BatchStatus.COMPLETE + assert bi.status == BatchStatus.READY_TO_LABEL assert bi.total_url_count == 2 assert bi.parameters == dto.model_dump() assert bi.strategy == CollectorType.EXAMPLE.value diff --git a/tests/test_automated/integration/core/test_async_core.py b/tests/test_automated/integration/core/test_async_core.py index ed314dfd..f2125865 100644 --- a/tests/test_automated/integration/core/test_async_core.py +++ b/tests/test_automated/integration/core/test_async_core.py @@ -44,7 +44,7 @@ async def test_conclude_task_success(db_data_creator: DBDataCreator): task_info = await ddc.adb_client.get_task_info(task_id=task_id) - assert task_info.task_status == BatchStatus.COMPLETE + assert task_info.task_status == BatchStatus.READY_TO_LABEL assert len(task_info.urls) == 3 @pytest.mark.asyncio @@ -65,7 +65,7 @@ async def test_conclude_task_success(db_data_creator: DBDataCreator): task_info = await ddc.adb_client.get_task_info(task_id=task_id) - assert task_info.task_status == BatchStatus.COMPLETE + assert task_info.task_status == BatchStatus.READY_TO_LABEL assert len(task_info.urls) == 3 @pytest.mark.asyncio diff --git a/tests/test_automated/integration/core/test_example_collector_lifecycle.py b/tests/test_automated/integration/core/test_example_collector_lifecycle.py index d3f3f855..a9c4900f 100644 --- a/tests/test_automated/integration/core/test_example_collector_lifecycle.py +++ b/tests/test_automated/integration/core/test_example_collector_lifecycle.py @@ -41,11 +41,11 @@ async def test_example_collector_lifecycle( await asyncio.sleep(1.5) await acore.collector_manager.logger.flush_all() print("Done sleeping...") - assert core.get_status(batch_id) == BatchStatus.COMPLETE + assert core.get_status(batch_id) == BatchStatus.READY_TO_LABEL batch_info: BatchInfo = db_client.get_batch_by_id(batch_id) assert batch_info.strategy == "example" - assert batch_info.status == BatchStatus.COMPLETE + assert batch_info.status == BatchStatus.READY_TO_LABEL assert batch_info.total_url_count == 2 assert batch_info.parameters == dto.model_dump() assert batch_info.compute_time > 1 @@ -90,4 +90,4 @@ async def test_example_collector_lifecycle_multiple_batches( await asyncio.sleep(3) for csi in csis: - assert core.get_status(csi.batch_id) == BatchStatus.COMPLETE + assert core.get_status(csi.batch_id) == BatchStatus.READY_TO_LABEL diff --git a/util/alembic_helpers.py b/util/alembic_helpers.py index d2120634..84cdbfa7 100644 --- a/util/alembic_helpers.py +++ b/util/alembic_helpers.py @@ -6,7 +6,8 @@ def switch_enum_type( column_name, enum_name, new_enum_values, - drop_old_enum=True + drop_old_enum=True, + cast_dict: dict = None ): """ Switches an ENUM type in a PostgreSQL column by: @@ -36,3 +37,13 @@ def switch_enum_type( # Drop the old enum type if drop_old_enum: op.execute(f'DROP TYPE "{old_enum_temp_name}"') + +def alter_enum_value( + enum_name, + old_value, + new_value +): + """ + Changes one value of an enum type + """ + op.execute(f"ALTER TYPE {enum_name} RENAME VALUE '{old_value}' TO '{new_value}'") \ No newline at end of file