Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions ENV.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,17 @@ Note that some tasks/subtasks are themselves enabled by other tasks.

### Scheduled Task Flags

| Flag | Description |
|-------------------------------------|--------------------------------------------------------------------|
| `SCHEDULED_TASKS_FLAG` | All scheduled tasks. Disabling disables all other scheduled tasks. |
| `PUSH_TO_HUGGING_FACE_TASK_FLAG` | Pushes data to HuggingFace. |
| `POPULATE_BACKLOG_SNAPSHOT_TASK_FLAG` | Populates the backlog snapshot. |
| `DELETE_OLD_LOGS_TASK_FLAG` | Deletes old logs. |
| `RUN_URL_TASKS_TASK_FLAG` | Runs URL tasks. |
| `IA_PROBE_TASK_FLAG` | Extracts and links Internet Archives metadata to URLs. |
| `IA_SAVE_TASK_FLAG` | Saves URLs to Internet Archives. |
| Flag | Description |
|-------------------------------------|-------------------------------------------------------------------------------|
| `SCHEDULED_TASKS_FLAG` | All scheduled tasks. Disabling disables all other scheduled tasks. |
| `PUSH_TO_HUGGING_FACE_TASK_FLAG` | Pushes data to HuggingFace. |
| `POPULATE_BACKLOG_SNAPSHOT_TASK_FLAG` | Populates the backlog snapshot. |
| `DELETE_OLD_LOGS_TASK_FLAG` | Deletes old logs. |
| `RUN_URL_TASKS_TASK_FLAG` | Runs URL tasks. |
| `IA_PROBE_TASK_FLAG` | Extracts and links Internet Archives metadata to URLs. |
| `IA_SAVE_TASK_FLAG` | Saves URLs to Internet Archives. |
| `MARK_TASK_NEVER_COMPLETED_TASK_FLAG` | Marks tasks that were started but never completed (usually due to a restart). |
| `DELETE_STALE_SCREENSHOTS_TASK_FLAG` | Deletes stale screenshots for URLs already validated. |

### URL Task Flags

Expand All @@ -87,6 +89,7 @@ URL Task Flags are collectively controlled by the `RUN_URL_TASKS_TASK_FLAG` flag
| `URL_AUTO_VALIDATE_TASK_FLAG` | Automatically validates URLs. |
| `URL_AUTO_NAME_TASK_FLAG` | Automatically names URLs. |
| `URL_SUSPEND_TASK_FLAG` | Suspends URLs meeting suspension criteria. |
| `URL_SUBMIT_META_URLS_TASK_FLAG` | Submits meta URLs to the Data Sources App. |

### Agency ID Subtasks

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
from alembic import op

from src.util.alembic_helpers import url_id_column, user_id_column, created_at_column

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Add logic for meta URL submissions

Revision ID: 241fd3925f5d
Revises: 84a3de626ad8
Create Date: 2025-09-30 16:13:03.980113

"""
from typing import Sequence, Union

import sqlalchemy as sa
from alembic import op

from src.util.alembic_helpers import url_id_column, created_at_column, agency_id_column

# revision identifiers, used by Alembic.
revision: str = '241fd3925f5d'
down_revision: Union[str, None] = '84a3de626ad8'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:

Check warning on line 22 in alembic/versions/2025_09_30_1613-241fd3925f5d_add_logic_for_meta_url_submissions.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_09_30_1613-241fd3925f5d_add_logic_for_meta_url_submissions.py#L22 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_09_30_1613-241fd3925f5d_add_logic_for_meta_url_submissions.py:22:1: D103 Missing docstring in public function
op.execute("""ALTER TYPE task_type ADD VALUE 'Submit Meta URLs'""")
op.create_table(
"url_ds_meta_url",
url_id_column(),
agency_id_column(),
sa.Column("ds_meta_url_id", sa.Integer(), nullable=False),
created_at_column(),
sa.PrimaryKeyConstraint(
"url_id",
"agency_id"
),
sa.UniqueConstraint(
"ds_meta_url_id"
)
)
op.execute("""ALTER TYPE task_type ADD VALUE 'Delete Stale Screenshots'""")
op.execute("""ALTER TYPE task_type ADD VALUE 'Mark Task Never Completed'""")
op.execute("""
CREATE TYPE task_status_enum as ENUM(
'complete',
'in-process',
'error',
'aborted',
'never-completed'
)
""")
op.execute("""
ALTER TABLE tasks
ALTER COLUMN task_status DROP DEFAULT,
ALTER COLUMN task_status TYPE task_status_enum
USING (
CASE task_status::text -- old enum -> text
WHEN 'ready to label' THEN 'complete'::task_status_enum
ELSE task_status::text::task_status_enum
END
);
""")


def downgrade() -> None:

Check warning on line 62 in alembic/versions/2025_09_30_1613-241fd3925f5d_add_logic_for_meta_url_submissions.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_09_30_1613-241fd3925f5d_add_logic_for_meta_url_submissions.py#L62 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_09_30_1613-241fd3925f5d_add_logic_for_meta_url_submissions.py:62:1: D103 Missing docstring in public function
pass
3 changes: 2 additions & 1 deletion src/api/endpoints/task/by_id/dto.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from pydantic import BaseModel

from src.db.models.impl.task.enums import TaskStatus
from src.db.models.impl.url.core.pydantic.info import URLInfo
from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic
from src.db.enums import TaskType
Expand All @@ -11,7 +12,7 @@

class TaskInfo(BaseModel):
task_type: TaskType
task_status: BatchStatus
task_status: TaskStatus
updated_at: datetime.datetime
error_info: str | None = None
urls: list[URLInfo]
Expand Down
3 changes: 2 additions & 1 deletion src/api/endpoints/task/by_id/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from src.api.endpoints.task.by_id.dto import TaskInfo
from src.collectors.enums import URLStatus
from src.core.enums import BatchStatus
from src.db.models.impl.task.enums import TaskStatus
from src.db.models.impl.url.core.pydantic.info import URLInfo
from src.db.models.impl.url.error_info.pydantic import URLErrorInfoPydantic
from src.db.enums import TaskType
Expand Down Expand Up @@ -59,7 +60,7 @@ async def run(self, session: AsyncSession) -> TaskInfo:
errored_urls.append(url_error_info)
return TaskInfo(
task_type=TaskType(task.task_type),
task_status=BatchStatus(task.task_status),
task_status=TaskStatus(task.task_status),
error_info=error,
updated_at=task.updated_at,
urls=url_infos,
Expand Down
3 changes: 2 additions & 1 deletion src/core/tasks/base/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from src.core.tasks.url.enums import TaskOperatorOutcome
from src.db.client.async_ import AsyncDatabaseClient
from src.db.enums import TaskType
from src.db.models.impl.task.enums import TaskStatus


class TaskOperatorBase(ABC):
Expand Down Expand Up @@ -60,7 +61,7 @@ async def inner_task_logic(self) -> None:
raise NotImplementedError

async def handle_task_error(self, e):
await self.adb_client.update_task_status(task_id=self.task_id, status=BatchStatus.ERROR)
await self.adb_client.update_task_status(task_id=self.task_id, status=TaskStatus.ERROR)
await self.adb_client.add_task_error(
task_id=self.task_id,
error=str(e)
Expand Down
6 changes: 4 additions & 2 deletions src/core/tasks/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from src.core.tasks.url.enums import TaskOperatorOutcome
from src.db.client.async_ import AsyncDatabaseClient
from src.db.enums import TaskType
from src.db.models.impl.task.enums import TaskStatus


class TaskHandler:
Expand Down Expand Up @@ -42,13 +43,14 @@ async def handle_outcome(self, run_info: TaskOperatorRunInfo): #
case TaskOperatorOutcome.SUCCESS:
await self.adb_client.update_task_status(
task_id=run_info.task_id,
status=BatchStatus.READY_TO_LABEL
status=TaskStatus.COMPLETE
)

async def handle_task_error(self, run_info: TaskOperatorRunInfo): #
await self.adb_client.update_task_status(
task_id=run_info.task_id,
status=BatchStatus.ERROR)
status=TaskStatus.ERROR
)
await self.adb_client.add_task_error(
task_id=run_info.task_id,
error=run_info.message
Expand Down
13 changes: 9 additions & 4 deletions src/core/tasks/scheduled/impl/delete_logs/operator.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
import datetime

Check warning on line 1 in src/core/tasks/scheduled/impl/delete_logs/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_logs/operator.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/delete_logs/operator.py:1:1: D100 Missing docstring in public module

from sqlalchemy import delete

from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase
from src.db.client.async_ import AsyncDatabaseClient
from src.db.enums import TaskType
from src.db.models.impl.log.sqlalchemy import Log


class DeleteOldLogsTaskOperator(ScheduledTaskOperatorBase):

def __init__(self, adb_client: AsyncDatabaseClient):
super().__init__(adb_client)

@property
def task_type(self) -> TaskType:
return TaskType.DELETE_OLD_LOGS

async def inner_task_logic(self) -> None:
await self.adb_client.delete_old_logs()
statement = delete(Log).where(
Log.created_at < datetime.datetime.now() - datetime.timedelta(days=7)
)
await self.adb_client.execute(statement)

Check warning on line 21 in src/core/tasks/scheduled/impl/delete_logs/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_logs/operator.py#L21 <292>

no newline at end of file
Raw output
./src/core/tasks/scheduled/impl/delete_logs/operator.py:21:49: W292 no newline at end of file
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from src.core.tasks.scheduled.impl.delete_stale_screenshots.query import DeleteStaleScreenshotsQueryBuilder

Check warning on line 1 in src/core/tasks/scheduled/impl/delete_stale_screenshots/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_stale_screenshots/operator.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/delete_stale_screenshots/operator.py:1:1: D100 Missing docstring in public module
from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase
from src.db.enums import TaskType


class DeleteStaleScreenshotsTaskOperator(ScheduledTaskOperatorBase):

Check warning on line 6 in src/core/tasks/scheduled/impl/delete_stale_screenshots/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_stale_screenshots/operator.py#L6 <101>

Missing docstring in public class
Raw output
./src/core/tasks/scheduled/impl/delete_stale_screenshots/operator.py:6:1: D101 Missing docstring in public class

@property
def task_type(self) -> TaskType:

Check warning on line 9 in src/core/tasks/scheduled/impl/delete_stale_screenshots/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_stale_screenshots/operator.py#L9 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/delete_stale_screenshots/operator.py:9:1: D102 Missing docstring in public method
return TaskType.DELETE_STALE_SCREENSHOTS

async def inner_task_logic(self) -> None:

Check warning on line 12 in src/core/tasks/scheduled/impl/delete_stale_screenshots/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_stale_screenshots/operator.py#L12 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/delete_stale_screenshots/operator.py:12:1: D102 Missing docstring in public method
await self.adb_client.run_query_builder(
DeleteStaleScreenshotsQueryBuilder()
)

Check warning on line 15 in src/core/tasks/scheduled/impl/delete_stale_screenshots/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_stale_screenshots/operator.py#L15 <292>

no newline at end of file
Raw output
./src/core/tasks/scheduled/impl/delete_stale_screenshots/operator.py:15:10: W292 no newline at end of file
29 changes: 29 additions & 0 deletions src/core/tasks/scheduled/impl/delete_stale_screenshots/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from typing import Any

Check warning on line 1 in src/core/tasks/scheduled/impl/delete_stale_screenshots/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_stale_screenshots/query.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/delete_stale_screenshots/query.py:1:1: D100 Missing docstring in public module

from sqlalchemy import delete, exists, select
from sqlalchemy.ext.asyncio import AsyncSession

from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated
from src.db.models.impl.url.screenshot.sqlalchemy import URLScreenshot
from src.db.queries.base.builder import QueryBuilderBase


class DeleteStaleScreenshotsQueryBuilder(QueryBuilderBase):

Check warning on line 11 in src/core/tasks/scheduled/impl/delete_stale_screenshots/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_stale_screenshots/query.py#L11 <101>

Missing docstring in public class
Raw output
./src/core/tasks/scheduled/impl/delete_stale_screenshots/query.py:11:1: D101 Missing docstring in public class

async def run(self, session: AsyncSession) -> Any:

Check warning on line 13 in src/core/tasks/scheduled/impl/delete_stale_screenshots/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_stale_screenshots/query.py#L13 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/delete_stale_screenshots/query.py:13:1: D102 Missing docstring in public method

statement = (
delete(
URLScreenshot
)
.where(
exists(
select(
FlagURLValidated,
FlagURLValidated.url_id == URLScreenshot.url_id,
)
)
)
)

await session.execute(statement)

Check warning on line 29 in src/core/tasks/scheduled/impl/delete_stale_screenshots/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/delete_stale_screenshots/query.py#L29 <292>

no newline at end of file
Raw output
./src/core/tasks/scheduled/impl/delete_stale_screenshots/query.py:29:41: W292 no newline at end of file
Empty file.
15 changes: 15 additions & 0 deletions src/core/tasks/scheduled/impl/mark_never_completed/operator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from src.core.tasks.scheduled.impl.mark_never_completed.query import MarkTaskNeverCompletedQueryBuilder

Check warning on line 1 in src/core/tasks/scheduled/impl/mark_never_completed/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/mark_never_completed/operator.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/mark_never_completed/operator.py:1:1: D100 Missing docstring in public module
from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase
from src.db.enums import TaskType


class MarkTaskNeverCompletedOperator(ScheduledTaskOperatorBase):

Check warning on line 6 in src/core/tasks/scheduled/impl/mark_never_completed/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/mark_never_completed/operator.py#L6 <101>

Missing docstring in public class
Raw output
./src/core/tasks/scheduled/impl/mark_never_completed/operator.py:6:1: D101 Missing docstring in public class

@property
def task_type(self) -> TaskType:

Check warning on line 9 in src/core/tasks/scheduled/impl/mark_never_completed/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/mark_never_completed/operator.py#L9 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/mark_never_completed/operator.py:9:1: D102 Missing docstring in public method
return TaskType.MARK_TASK_NEVER_COMPLETED

async def inner_task_logic(self) -> None:

Check warning on line 12 in src/core/tasks/scheduled/impl/mark_never_completed/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/mark_never_completed/operator.py#L12 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/mark_never_completed/operator.py:12:1: D102 Missing docstring in public method
await self.adb_client.run_query_builder(
MarkTaskNeverCompletedQueryBuilder()
)

Check warning on line 15 in src/core/tasks/scheduled/impl/mark_never_completed/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/mark_never_completed/operator.py#L15 <292>

no newline at end of file
Raw output
./src/core/tasks/scheduled/impl/mark_never_completed/operator.py:15:10: W292 no newline at end of file
27 changes: 27 additions & 0 deletions src/core/tasks/scheduled/impl/mark_never_completed/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from datetime import timedelta, datetime

Check warning on line 1 in src/core/tasks/scheduled/impl/mark_never_completed/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/mark_never_completed/query.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/mark_never_completed/query.py:1:1: D100 Missing docstring in public module
from typing import Any

from sqlalchemy import update
from sqlalchemy.ext.asyncio import AsyncSession

from src.core.enums import BatchStatus
from src.db.enums import TaskType

Check warning on line 8 in src/core/tasks/scheduled/impl/mark_never_completed/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/mark_never_completed/query.py#L8 <401>

'src.db.enums.TaskType' imported but unused
Raw output
./src/core/tasks/scheduled/impl/mark_never_completed/query.py:8:1: F401 'src.db.enums.TaskType' imported but unused
from src.db.models.impl.task.core import Task
from src.db.queries.base.builder import QueryBuilderBase


class MarkTaskNeverCompletedQueryBuilder(QueryBuilderBase):

Check warning on line 13 in src/core/tasks/scheduled/impl/mark_never_completed/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/mark_never_completed/query.py#L13 <101>

Missing docstring in public class
Raw output
./src/core/tasks/scheduled/impl/mark_never_completed/query.py:13:1: D101 Missing docstring in public class

async def run(self, session: AsyncSession) -> Any:

Check warning on line 15 in src/core/tasks/scheduled/impl/mark_never_completed/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/mark_never_completed/query.py#L15 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/mark_never_completed/query.py:15:1: D102 Missing docstring in public method
statement = (
update(
Task
).values(
task_status=BatchStatus.ABORTED.value
).
where(
Task.task_status == BatchStatus.IN_PROCESS,
Task.updated_at < datetime.now() - timedelta(hours=1)
)
)
await session.execute(statement)

Check warning on line 27 in src/core/tasks/scheduled/impl/mark_never_completed/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/mark_never_completed/query.py#L27 <292>

no newline at end of file
Raw output
./src/core/tasks/scheduled/impl/mark_never_completed/query.py:27:41: W292 no newline at end of file
33 changes: 22 additions & 11 deletions src/core/tasks/scheduled/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
from src.core.tasks.scheduled.enums import IntervalEnum
from src.core.tasks.scheduled.impl.backlog.operator import PopulateBacklogSnapshotTaskOperator
from src.core.tasks.scheduled.impl.delete_logs.operator import DeleteOldLogsTaskOperator
from src.core.tasks.scheduled.impl.delete_stale_screenshots.operator import DeleteStaleScreenshotsTaskOperator
from src.core.tasks.scheduled.impl.huggingface.operator import PushToHuggingFaceTaskOperator
from src.core.tasks.scheduled.impl.internet_archives.probe.operator import InternetArchivesProbeTaskOperator
from src.core.tasks.scheduled.impl.internet_archives.save.operator import InternetArchivesSaveTaskOperator
from src.core.tasks.scheduled.impl.mark_never_completed.operator import MarkTaskNeverCompletedOperator
from src.core.tasks.scheduled.impl.mark_never_completed.query import MarkTaskNeverCompletedQueryBuilder

Check warning on line 12 in src/core/tasks/scheduled/loader.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/loader.py#L12 <401>

'src.core.tasks.scheduled.impl.mark_never_completed.query.MarkTaskNeverCompletedQueryBuilder' imported but unused
Raw output
./src/core/tasks/scheduled/loader.py:12:1: F401 'src.core.tasks.scheduled.impl.mark_never_completed.query.MarkTaskNeverCompletedQueryBuilder' imported but unused
from src.core.tasks.scheduled.impl.run_url_tasks.operator import RunURLTasksTaskOperator
from src.core.tasks.scheduled.models.entry import ScheduledTaskEntry
from src.db.client.async_ import AsyncDatabaseClient
Expand Down Expand Up @@ -37,6 +40,9 @@
self.env = Env()
self.env.read_env()

def setup_flag(self, name: str) -> bool:

Check warning on line 43 in src/core/tasks/scheduled/loader.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/loader.py#L43 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/loader.py:43:1: D102 Missing docstring in public method
return self.env.bool(name, default=True)


async def load_entries(self) -> list[ScheduledTaskEntry]:
scheduled_task_flag = self.env.bool("SCHEDULED_TASKS_FLAG", default=True)
Expand All @@ -52,45 +58,50 @@
ia_client=self.ia_client
),
interval_minutes=IntervalEnum.TEN_MINUTES.value,
enabled=self.env.bool("IA_PROBE_TASK_FLAG", default=True),
enabled=self.setup_flag("IA_PROBE_TASK_FLAG"),
),
ScheduledTaskEntry(
operator=InternetArchivesSaveTaskOperator(
adb_client=self.adb_client,
ia_client=self.ia_client
),
interval_minutes=IntervalEnum.TEN_MINUTES.value,
enabled=self.env.bool("IA_SAVE_TASK_FLAG", default=True),
enabled=self.setup_flag("IA_SAVE_TASK_FLAG"),
),
ScheduledTaskEntry(
operator=DeleteOldLogsTaskOperator(adb_client=self.adb_client),
interval_minutes=IntervalEnum.DAILY.value,
enabled=self.env.bool("DELETE_OLD_LOGS_TASK_FLAG", default=True)
enabled=self.setup_flag("DELETE_OLD_LOGS_TASK_FLAG")
),
ScheduledTaskEntry(
operator=RunURLTasksTaskOperator(async_core=self.async_core),
interval_minutes=self.env.int(
"URL_TASKS_FREQUENCY_MINUTES",
default=IntervalEnum.HOURLY.value
),
enabled=self.env.bool("RUN_URL_TASKS_TASK_FLAG", default=True)

enabled=self.setup_flag("RUN_URL_TASKS_TASK_FLAG")
),
ScheduledTaskEntry(
operator=PopulateBacklogSnapshotTaskOperator(adb_client=self.async_core.adb_client),
interval_minutes=IntervalEnum.DAILY.value,
enabled=self.env.bool("POPULATE_BACKLOG_SNAPSHOT_TASK_FLAG", default=True)
enabled=self.setup_flag("POPULATE_BACKLOG_SNAPSHOT_TASK_FLAG")
),
ScheduledTaskEntry(
operator=PushToHuggingFaceTaskOperator(
adb_client=self.async_core.adb_client,
hf_client=self.hf_client
),
interval_minutes=IntervalEnum.DAILY.value,
enabled=self.env.bool(
"PUSH_TO_HUGGING_FACE_TASK_FLAG",
default=True
)
enabled=self.setup_flag("PUSH_TO_HUGGING_FACE_TASK_FLAG")
),
ScheduledTaskEntry(
operator=MarkTaskNeverCompletedOperator(adb_client=self.adb_client),
interval_minutes=IntervalEnum.DAILY.value,
enabled=self.setup_flag("MARK_TASK_NEVER_COMPLETED_TASK_FLAG")
),
ScheduledTaskEntry(
operator=DeleteStaleScreenshotsTaskOperator(adb_client=self.adb_client),
interval_minutes=IntervalEnum.DAILY.value,
enabled=self.setup_flag("DELETE_STALE_SCREENSHOTS_TASK_FLAG")
)

]
Loading