Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""Change batch completed to ready to label

Revision ID: e285e6e7cf71
Revises: 997f5bf53772
Create Date: 2025-04-17 09:09:38.137131

"""
from typing import Sequence, Union

from alembic import op

Check warning on line 10 in alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py#L10 <401>

'alembic.op' imported but unused
Raw output
./alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py:10:1: F401 'alembic.op' imported but unused
import sqlalchemy as sa

Check warning on line 11 in alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py#L11 <401>

'sqlalchemy as sa' imported but unused
Raw output
./alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py:11:1: F401 'sqlalchemy as sa' imported but unused

from util.alembic_helpers import switch_enum_type, alter_enum_value

Check warning on line 13 in alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py#L13 <401>

'util.alembic_helpers.switch_enum_type' imported but unused
Raw output
./alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py:13:1: F401 'util.alembic_helpers.switch_enum_type' imported but unused

# revision identifiers, used by Alembic.
revision: str = 'e285e6e7cf71'
down_revision: Union[str, None] = '997f5bf53772'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:

Check warning on line 22 in alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py#L22 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py:22:1: D103 Missing docstring in public function
alter_enum_value(
enum_name="batch_status",
old_value="complete",
new_value="ready to label"
)



def downgrade() -> None:

Check warning on line 31 in alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py#L31 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py:31:1: D103 Missing docstring in public function

Check failure on line 31 in alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py#L31 <303>

too many blank lines (3)
Raw output
./alembic/versions/2025_04_17_0909-e285e6e7cf71_change_batch_completed_to_ready_to_label.py:31:1: E303 too many blank lines (3)
alter_enum_value(
enum_name="batch_status",
old_value="ready to label",
new_value="complete"
)
2 changes: 1 addition & 1 deletion collector_db/StatementComposer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def pending_urls_without_html_data() -> Select:
join(Task, LinkTaskURL.task_id == Task.id).
where(LinkTaskURL.url_id == URL.id).
where(Task.task_type == TaskType.HTML.value).
where(Task.task_status == BatchStatus.COMPLETE.value)
where(Task.task_status == BatchStatus.READY_TO_LABEL.value)
)
query = (
select(URL).
Expand Down
2 changes: 1 addition & 1 deletion collector_db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

CURRENT_TIME_SERVER_DEFAULT = func.now()

batch_status_enum = PGEnum('complete', 'error', 'in-process', 'aborted', name='batch_status')
batch_status_enum = PGEnum('ready to label', 'error', 'in-process', 'aborted', name='batch_status')

record_type_values = get_enum_values(RecordType)

Expand Down
2 changes: 1 addition & 1 deletion collector_manager/AsyncCollectorBase.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,4 @@ async def log(
))

async def close(self) -> None:
self.status = BatchStatus.COMPLETE
self.status = BatchStatus.READY_TO_LABEL
2 changes: 1 addition & 1 deletion core/TaskManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ async def handle_outcome(self, run_info: TaskOperatorRunInfo):
case TaskOperatorOutcome.SUCCESS:
await self.adb_client.update_task_status(
task_id=run_info.task_id,
status=BatchStatus.COMPLETE
status=BatchStatus.READY_TO_LABEL
)

async def handle_task_error(self, run_info: TaskOperatorRunInfo):
Expand Down
2 changes: 1 addition & 1 deletion core/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


class BatchStatus(Enum):
COMPLETE = "complete"
READY_TO_LABEL = "ready to label"
IN_PROCESS = "in-process"
ERROR = "error"
ABORTED = "aborted"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_auto_googler_collector_lifecycle(test_core):

batch_info: BatchInfo = api.dependencies.db_client.get_batch_by_id(1)
assert batch_info.strategy == "auto_googler"
assert batch_info.status == BatchStatus.COMPLETE
assert batch_info.status == BatchStatus.READY_TO_LABEL
assert batch_info.total_url_count == 20

url_infos = db_client.get_urls_by_batch(1)
Expand Down
2 changes: 1 addition & 1 deletion tests/manual/core/lifecycle/test_ckan_lifecycle.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_ckan_lifecycle(test_core):

batch_info: BatchInfo = db_client.get_batch_by_id(1)
assert batch_info.strategy == "ckan"
assert batch_info.status == BatchStatus.COMPLETE
assert batch_info.status == BatchStatus.READY_TO_LABEL
assert batch_info.total_url_count >= 3000

url_infos = db_client.get_urls_by_batch(1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_common_crawler_lifecycle(test_core: SourceCollectorCore):

batch_info = db_client.get_batch_by_id(1)
assert batch_info.strategy == "common_crawler"
assert batch_info.status == BatchStatus.COMPLETE
assert batch_info.status == BatchStatus.READY_TO_LABEL
assert batch_info.parameters == config

url_infos = db_client.get_urls_by_batch(1)
Expand Down
6 changes: 3 additions & 3 deletions tests/manual/core/lifecycle/test_muckrock_lifecycles.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def test_muckrock_simple_search_collector_lifecycle(test_core):

batch_info: BatchInfo = db_client.get_batch_by_id(1)
assert batch_info.strategy == "muckrock_simple_search"
assert batch_info.status == BatchStatus.COMPLETE
assert batch_info.status == BatchStatus.READY_TO_LABEL
assert batch_info.total_url_count >= 10

url_infos = db_client.get_urls_by_batch(1)
Expand All @@ -45,7 +45,7 @@ def test_muckrock_county_level_search_collector_lifecycle(test_core):

batch_info: BatchInfo = db_client.get_batch_by_id(1)
assert batch_info.strategy == "muckrock_county_search"
assert batch_info.status == BatchStatus.COMPLETE
assert batch_info.status == BatchStatus.READY_TO_LABEL
assert batch_info.total_url_count >= 10

url_infos = db_client.get_urls_by_batch(1)
Expand All @@ -67,7 +67,7 @@ def test_muckrock_full_search_collector_lifecycle(test_core):

batch_info: BatchInfo = db_client.get_batch_by_id(1)
assert batch_info.strategy == CollectorType.MUCKROCK_ALL_SEARCH.value
assert batch_info.status == BatchStatus.COMPLETE
assert batch_info.status == BatchStatus.READY_TO_LABEL
assert batch_info.total_url_count >= 1

url_infos = db_client.get_urls_by_batch(1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,18 @@ async def test_example_collector(api_test_helper):

csr: GetBatchStatusResponse = ath.request_validator.get_batch_statuses(
collector_type=CollectorType.EXAMPLE,
status=BatchStatus.COMPLETE
status=BatchStatus.READY_TO_LABEL
)

assert len(csr.results) == 1
bsi: BatchStatusInfo = csr.results[0]

assert bsi.id == batch_id
assert bsi.strategy == CollectorType.EXAMPLE.value
assert bsi.status == BatchStatus.COMPLETE
assert bsi.status == BatchStatus.READY_TO_LABEL

bi: BatchInfo = ath.request_validator.get_batch_info(batch_id=batch_id)
assert bi.status == BatchStatus.COMPLETE
assert bi.status == BatchStatus.READY_TO_LABEL
assert bi.total_url_count == 2
assert bi.parameters == dto.model_dump()
assert bi.strategy == CollectorType.EXAMPLE.value
Expand Down
4 changes: 2 additions & 2 deletions tests/test_automated/integration/core/test_async_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ async def test_conclude_task_success(db_data_creator: DBDataCreator):

task_info = await ddc.adb_client.get_task_info(task_id=task_id)

assert task_info.task_status == BatchStatus.COMPLETE
assert task_info.task_status == BatchStatus.READY_TO_LABEL
assert len(task_info.urls) == 3

@pytest.mark.asyncio
Expand All @@ -65,7 +65,7 @@ async def test_conclude_task_success(db_data_creator: DBDataCreator):

task_info = await ddc.adb_client.get_task_info(task_id=task_id)

assert task_info.task_status == BatchStatus.COMPLETE
assert task_info.task_status == BatchStatus.READY_TO_LABEL
assert len(task_info.urls) == 3

@pytest.mark.asyncio
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ async def test_example_collector_lifecycle(
await asyncio.sleep(1.5)
await acore.collector_manager.logger.flush_all()
print("Done sleeping...")
assert core.get_status(batch_id) == BatchStatus.COMPLETE
assert core.get_status(batch_id) == BatchStatus.READY_TO_LABEL

batch_info: BatchInfo = db_client.get_batch_by_id(batch_id)
assert batch_info.strategy == "example"
assert batch_info.status == BatchStatus.COMPLETE
assert batch_info.status == BatchStatus.READY_TO_LABEL
assert batch_info.total_url_count == 2
assert batch_info.parameters == dto.model_dump()
assert batch_info.compute_time > 1
Expand Down Expand Up @@ -90,4 +90,4 @@ async def test_example_collector_lifecycle_multiple_batches(
await asyncio.sleep(3)

for csi in csis:
assert core.get_status(csi.batch_id) == BatchStatus.COMPLETE
assert core.get_status(csi.batch_id) == BatchStatus.READY_TO_LABEL
13 changes: 12 additions & 1 deletion util/alembic_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
column_name,
enum_name,
new_enum_values,
drop_old_enum=True
drop_old_enum=True,
cast_dict: dict = None

Check warning on line 10 in util/alembic_helpers.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] util/alembic_helpers.py#L10 <100>

Unused argument 'cast_dict'
Raw output
./util/alembic_helpers.py:10:9: U100 Unused argument 'cast_dict'
):
"""
Switches an ENUM type in a PostgreSQL column by:
Expand Down Expand Up @@ -36,3 +37,13 @@
# Drop the old enum type
if drop_old_enum:
op.execute(f'DROP TYPE "{old_enum_temp_name}"')

def alter_enum_value(
enum_name,
old_value,
new_value
):
"""
Changes one value of an enum type
"""
op.execute(f"ALTER TYPE {enum_name} RENAME VALUE '{old_value}' TO '{new_value}'")

Check warning on line 49 in util/alembic_helpers.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] util/alembic_helpers.py#L49 <292>

no newline at end of file
Raw output
./util/alembic_helpers.py:49:86: W292 no newline at end of file