Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from src.core.tasks.scheduled.impl.internet_archives.probe.convert import convert_ia_url_mapping_to_ia_metadata
from src.core.tasks.scheduled.impl.internet_archives.probe.filter import filter_into_subsets
from src.core.tasks.scheduled.impl.internet_archives.probe.models.subset import IAURLMappingSubsets
from src.core.tasks.scheduled.impl.internet_archives.probe.queries.delete import \
DeleteOldUnsuccessfulIACheckedFlagsQueryBuilder
from src.core.tasks.scheduled.impl.internet_archives.probe.queries.get import GetURLsForInternetArchivesTaskQueryBuilder
from src.core.tasks.scheduled.impl.internet_archives.probe.queries.prereq import \
CheckURLInternetArchivesTaskPrerequisitesQueryBuilder
Expand Down Expand Up @@ -45,6 +47,10 @@ async def meets_task_prerequisites(self) -> bool:
)

async def inner_task_logic(self) -> None:
await self.adb_client.run_query_builder(
DeleteOldUnsuccessfulIACheckedFlagsQueryBuilder()
)

url_mappings: list[URLMapping] = await self._get_url_mappings()
if len(url_mappings) == 0:
return
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from sqlalchemy import select, or_, exists, func, text, CTE, ColumnElement

Check warning on line 1 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py:1:1: D100 Missing docstring in public module

from src.db.helpers.query import not_exists_url
from src.db.models.impl.flag.checked_for_ia.sqlalchemy import FlagURLCheckedForInternetArchives
from src.db.models.impl.url.core.sqlalchemy import URL


class CheckURLInternetArchivesCTEContainer:

Check warning on line 8 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py#L8 <101>

Missing docstring in public class
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py:8:1: D101 Missing docstring in public class

def __init__(self):

Check warning on line 10 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py#L10 <107>

Missing docstring in __init__
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py:10:1: D107 Missing docstring in __init__

self._cte = (
select(
URL.id.label("url_id"),
URL.url
)
.where(
or_(
not_exists_url(FlagURLCheckedForInternetArchives),
exists(
select(FlagURLCheckedForInternetArchives.url_id)
.where(
FlagURLCheckedForInternetArchives.url_id == URL.id,
~FlagURLCheckedForInternetArchives.success,
FlagURLCheckedForInternetArchives.created_at < func.now() - text("INTERVAL '1 week'")
)
)
)
).cte("check_url_internet_archives_prereq")
)

@property
def cte(self) -> CTE:

Check warning on line 33 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py#L33 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py:33:1: D102 Missing docstring in public method
return self._cte

@property
def url_id(self) -> ColumnElement[int]:

Check warning on line 37 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py#L37 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py:37:1: D102 Missing docstring in public method
return self._cte.c.url_id

@property
def url(self) -> ColumnElement[str]:

Check warning on line 41 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py#L41 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py:41:1: D102 Missing docstring in public method
return self._cte.c.url

Check warning on line 42 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py#L42 <292>

no newline at end of file
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/cte.py:42:31: W292 no newline at end of file
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from sqlalchemy import delete, exists, select

Check warning on line 1 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/delete.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/delete.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/delete.py:1:1: D100 Missing docstring in public module
from sqlalchemy.ext.asyncio import AsyncSession

from src.core.tasks.scheduled.impl.internet_archives.probe.queries.cte import CheckURLInternetArchivesCTEContainer
from src.db.models.impl.flag.checked_for_ia.sqlalchemy import FlagURLCheckedForInternetArchives
from src.db.queries.base.builder import QueryBuilderBase

class DeleteOldUnsuccessfulIACheckedFlagsQueryBuilder(QueryBuilderBase):

Check warning on line 8 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/delete.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/delete.py#L8 <101>

Missing docstring in public class
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/delete.py:8:1: D101 Missing docstring in public class

async def run(self, session: AsyncSession) -> None:

Check warning on line 10 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/delete.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/delete.py#L10 <102>

Missing docstring in public method
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/delete.py:10:1: D102 Missing docstring in public method
cte = CheckURLInternetArchivesCTEContainer()
query = (
delete(FlagURLCheckedForInternetArchives)
.where(
exists(
select(cte.url_id)
.where(
FlagURLCheckedForInternetArchives.url_id == cte.url_id,
)
)
)
)

await session.execute(query)

Check warning on line 24 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/delete.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/delete.py#L24 <292>

no newline at end of file
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/delete.py:24:37: W292 no newline at end of file
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from sqlalchemy import select
from sqlalchemy import select, or_, exists, text, func

Check warning on line 1 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py#L1 <100>

Missing docstring in public module
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py:1:1: D100 Missing docstring in public module

Check warning on line 1 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py#L1 <401>

'sqlalchemy.or_' imported but unused
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py:1:1: F401 'sqlalchemy.or_' imported but unused

Check warning on line 1 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py#L1 <401>

'sqlalchemy.exists' imported but unused
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py:1:1: F401 'sqlalchemy.exists' imported but unused

Check warning on line 1 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py#L1 <401>

'sqlalchemy.text' imported but unused
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py:1:1: F401 'sqlalchemy.text' imported but unused

Check warning on line 1 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py#L1 <401>

'sqlalchemy.func' imported but unused
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py:1:1: F401 'sqlalchemy.func' imported but unused
from sqlalchemy.ext.asyncio import AsyncSession

from src.core.tasks.scheduled.impl.internet_archives.probe.queries.cte import CheckURLInternetArchivesCTEContainer
from src.db.dtos.url.mapping import URLMapping
from src.db.helpers.query import not_exists_url

Check warning on line 6 in src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py#L6 <401>

'src.db.helpers.query.not_exists_url' imported but unused
Raw output
./src/core/tasks/scheduled/impl/internet_archives/probe/queries/get.py:6:1: F401 'src.db.helpers.query.not_exists_url' imported but unused
from src.db.models.impl.flag.checked_for_ia.sqlalchemy import FlagURLCheckedForInternetArchives
from src.db.models.impl.url.core.sqlalchemy import URL
from src.db.queries.base.builder import QueryBuilderBase
Expand All @@ -11,23 +13,19 @@
class GetURLsForInternetArchivesTaskQueryBuilder(QueryBuilderBase):

async def run(self, session: AsyncSession) -> list[URLMapping]:
cte = CheckURLInternetArchivesCTEContainer()
query = (
select(
URL.id,
URL.url
cte.url_id,
cte.url
)
.outerjoin(
FlagURLCheckedForInternetArchives,
URL.id == FlagURLCheckedForInternetArchives.url_id
)
.where(FlagURLCheckedForInternetArchives.url_id.is_(None))
.limit(100)
)

db_mappings = await sh.mappings(session, query=query)
return [
URLMapping(
url_id=mapping["id"],
url_id=mapping["url_id"],
url=mapping["url"]
) for mapping in db_mappings
]
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession

from src.core.tasks.scheduled.impl.internet_archives.probe.queries.cte import CheckURLInternetArchivesCTEContainer
from src.db.helpers.query import not_exists_url
from src.db.models.impl.flag.checked_for_ia.sqlalchemy import FlagURLCheckedForInternetArchives
from src.db.models.impl.url.core.sqlalchemy import URL
Expand All @@ -11,12 +12,8 @@
class CheckURLInternetArchivesTaskPrerequisitesQueryBuilder(QueryBuilderBase):

async def run(self, session: AsyncSession) -> bool:
cte = CheckURLInternetArchivesCTEContainer()
query = (
select(URL)
.where(
not_exists_url(FlagURLCheckedForInternetArchives)
)
.limit(1)
select(cte.url_id)
)
result = await sh.one_or_none(session, query=query)
return result is not None
return await sh.results_exist(session, query=query)
3 changes: 2 additions & 1 deletion src/db/models/impl/flag/checked_for_ia/sqlalchemy.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from sqlalchemy import PrimaryKeyConstraint
from sqlalchemy.orm import Mapped

from src.db.models.mixins import URLDependentMixin
from src.db.models.mixins import URLDependentMixin, CreatedAtMixin
from src.db.models.templates_.base import Base
from src.db.models.templates_.with_id import WithIDBase


class FlagURLCheckedForInternetArchives(
URLDependentMixin,
CreatedAtMixin,
Base
):

Expand Down