Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ENV.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ Note that some tasks/subtasks are themselves enabled by other tasks.
| `IA_SAVE_TASK_FLAG` | Saves URLs to Internet Archives. |
| `MARK_TASK_NEVER_COMPLETED_TASK_FLAG` | Marks tasks that were started but never completed (usually due to a restart). |
| `DELETE_STALE_SCREENSHOTS_TASK_FLAG` | Deletes stale screenshots for URLs already validated. |
| `TASK_CLEANUP_TASK_FLAG` | Cleans up tasks that are no longer needed. |

### URL Task Flags

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Add task cleanup task

Revision ID: c5c20af87511
Revises: 241fd3925f5d
Create Date: 2025-10-03 15:46:00.212674

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa

Check warning on line 11 in alembic/versions/2025_10_03_1546-c5c20af87511_add_task_cleanup_task.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_10_03_1546-c5c20af87511_add_task_cleanup_task.py#L11 <401>

'sqlalchemy as sa' imported but unused
Raw output
./alembic/versions/2025_10_03_1546-c5c20af87511_add_task_cleanup_task.py:11:1: F401 'sqlalchemy as sa' imported but unused


# revision identifiers, used by Alembic.
revision: str = 'c5c20af87511'
down_revision: Union[str, None] = '241fd3925f5d'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:

Check warning on line 21 in alembic/versions/2025_10_03_1546-c5c20af87511_add_task_cleanup_task.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_10_03_1546-c5c20af87511_add_task_cleanup_task.py#L21 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_10_03_1546-c5c20af87511_add_task_cleanup_task.py:21:1: D103 Missing docstring in public function
op.execute("""
ALTER TYPE task_type ADD VALUE 'Task Cleanup'
""")


def downgrade() -> None:

Check warning on line 27 in alembic/versions/2025_10_03_1546-c5c20af87511_add_task_cleanup_task.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_10_03_1546-c5c20af87511_add_task_cleanup_task.py#L27 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_10_03_1546-c5c20af87511_add_task_cleanup_task.py:27:1: D103 Missing docstring in public function
pass
Empty file.
15 changes: 15 additions & 0 deletions src/core/tasks/scheduled/impl/task_cleanup/operator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from src.core.tasks.scheduled.impl.task_cleanup.query import TaskCleanupQueryBuilder

Check warning on line 1 in src/core/tasks/scheduled/impl/task_cleanup/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/task_cleanup/operator.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/task_cleanup/operator.py:1:1: D100 Missing docstring in public module
from src.core.tasks.scheduled.templates.operator import ScheduledTaskOperatorBase
from src.db.enums import TaskType


class TaskCleanupOperator(ScheduledTaskOperatorBase):

Check warning on line 6 in src/core/tasks/scheduled/impl/task_cleanup/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/task_cleanup/operator.py#L6 <101>

Missing docstring in public class
Raw output
./src/core/tasks/scheduled/impl/task_cleanup/operator.py:6:1: D101 Missing docstring in public class

@property
def task_type(self) -> TaskType:

Check warning on line 9 in src/core/tasks/scheduled/impl/task_cleanup/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/task_cleanup/operator.py#L9 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/task_cleanup/operator.py:9:1: D102 Missing docstring in public method
return TaskType.TASK_CLEANUP

async def inner_task_logic(self) -> None:

Check warning on line 12 in src/core/tasks/scheduled/impl/task_cleanup/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/task_cleanup/operator.py#L12 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/task_cleanup/operator.py:12:1: D102 Missing docstring in public method
await self.adb_client.run_query_builder(
TaskCleanupQueryBuilder()
)

Check warning on line 15 in src/core/tasks/scheduled/impl/task_cleanup/operator.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/task_cleanup/operator.py#L15 <292>

no newline at end of file
Raw output
./src/core/tasks/scheduled/impl/task_cleanup/operator.py:15:10: W292 no newline at end of file
33 changes: 33 additions & 0 deletions src/core/tasks/scheduled/impl/task_cleanup/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from datetime import timedelta, datetime

Check warning on line 1 in src/core/tasks/scheduled/impl/task_cleanup/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/task_cleanup/query.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/task_cleanup/query.py:1:1: D100 Missing docstring in public module
from typing import Any

from sqlalchemy import delete
from sqlalchemy.ext.asyncio import AsyncSession

from src.db.models.impl.task.core import Task
from src.db.models.impl.task.error import TaskError

Check warning on line 8 in src/core/tasks/scheduled/impl/task_cleanup/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/task_cleanup/query.py#L8 <401>

'src.db.models.impl.task.error.TaskError' imported but unused
Raw output
./src/core/tasks/scheduled/impl/task_cleanup/query.py:8:1: F401 'src.db.models.impl.task.error.TaskError' imported but unused
from src.db.models.impl.url.error_info.sqlalchemy import URLErrorInfo
from src.db.queries.base.builder import QueryBuilderBase


class TaskCleanupQueryBuilder(QueryBuilderBase):

Check warning on line 13 in src/core/tasks/scheduled/impl/task_cleanup/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/task_cleanup/query.py#L13 <101>

Missing docstring in public class
Raw output
./src/core/tasks/scheduled/impl/task_cleanup/query.py:13:1: D101 Missing docstring in public class

async def run(self, session: AsyncSession) -> Any:

Check warning on line 15 in src/core/tasks/scheduled/impl/task_cleanup/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/task_cleanup/query.py#L15 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/task_cleanup/query.py:15:1: D102 Missing docstring in public method
one_week_ago: datetime = datetime.now() - timedelta(days=7)

statement = (
delete(URLErrorInfo)
.where(
URLErrorInfo.updated_at < one_week_ago
)
)
await session.execute(statement)

statement = (
delete(Task)
.where(
Task.updated_at < one_week_ago
)
)

await session.execute(statement)

Check warning on line 33 in src/core/tasks/scheduled/impl/task_cleanup/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/task_cleanup/query.py#L33 <292>

no newline at end of file
Raw output
./src/core/tasks/scheduled/impl/task_cleanup/query.py:33:41: W292 no newline at end of file
6 changes: 6 additions & 0 deletions src/core/tasks/scheduled/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from src.core.tasks.scheduled.impl.mark_never_completed.operator import MarkTaskNeverCompletedOperator
from src.core.tasks.scheduled.impl.mark_never_completed.query import MarkTaskNeverCompletedQueryBuilder
from src.core.tasks.scheduled.impl.run_url_tasks.operator import RunURLTasksTaskOperator
from src.core.tasks.scheduled.impl.task_cleanup.operator import TaskCleanupOperator
from src.core.tasks.scheduled.models.entry import ScheduledTaskEntry
from src.db.client.async_ import AsyncDatabaseClient
from src.external.huggingface.hub.client import HuggingFaceHubClient
Expand Down Expand Up @@ -103,5 +104,10 @@ async def load_entries(self) -> list[ScheduledTaskEntry]:
operator=DeleteStaleScreenshotsTaskOperator(adb_client=self.adb_client),
interval_minutes=IntervalEnum.DAILY.value,
enabled=self.setup_flag("DELETE_STALE_SCREENSHOTS_TASK_FLAG")
),
ScheduledTaskEntry(
operator=TaskCleanupOperator(adb_client=self.adb_client),
interval_minutes=IntervalEnum.DAILY.value,
enabled=self.setup_flag("TASK_CLEANUP_TASK_FLAG")
)
]
1 change: 1 addition & 0 deletions src/db/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class TaskType(PyEnum):
DELETE_STALE_SCREENSHOTS = "Delete Stale Screenshots"
MARK_TASK_NEVER_COMPLETED = "Mark Task Never Completed"
RUN_URL_TASKS = "Run URL Task Cycles"
TASK_CLEANUP = "Task Cleanup"

class ChangeLogOperationType(PyEnum):
INSERT = "INSERT"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from src.core.tasks.scheduled.loader import ScheduledTaskOperatorLoader

NUMBER_OF_ENTRIES = 8
NUMBER_OF_ENTRIES = 9

@pytest.mark.asyncio
async def test_happy_path(
Expand Down