Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Remove URL Error Status

Revision ID: c4edeb795134
Revises: b8a68f4260a4
Create Date: 2025-11-20 15:30:15.783191

"""
from typing import Sequence, Union

from alembic import op

Check warning on line 10 in alembic/versions/2025_11_20_1530-c4edeb795134_remove_url_error_status.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_11_20_1530-c4edeb795134_remove_url_error_status.py#L10 <401>

'alembic.op' imported but unused
Raw output
./alembic/versions/2025_11_20_1530-c4edeb795134_remove_url_error_status.py:10:1: F401 'alembic.op' imported but unused
import sqlalchemy as sa

Check warning on line 11 in alembic/versions/2025_11_20_1530-c4edeb795134_remove_url_error_status.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_11_20_1530-c4edeb795134_remove_url_error_status.py#L11 <401>

'sqlalchemy as sa' imported but unused
Raw output
./alembic/versions/2025_11_20_1530-c4edeb795134_remove_url_error_status.py:11:1: F401 'sqlalchemy as sa' imported but unused

from src.util.alembic_helpers import remove_enum_value

# revision identifiers, used by Alembic.
revision: str = 'c4edeb795134'
down_revision: Union[str, None] = 'b8a68f4260a4'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:

Check warning on line 22 in alembic/versions/2025_11_20_1530-c4edeb795134_remove_url_error_status.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_11_20_1530-c4edeb795134_remove_url_error_status.py#L22 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_11_20_1530-c4edeb795134_remove_url_error_status.py:22:1: D103 Missing docstring in public function
remove_enum_value(
enum_name="url_status",
value_to_remove="error",
targets=[
("urls", "status")
]
)


def downgrade() -> None:

Check warning on line 32 in alembic/versions/2025_11_20_1530-c4edeb795134_remove_url_error_status.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2025_11_20_1530-c4edeb795134_remove_url_error_status.py#L32 <103>

Missing docstring in public function
Raw output
./alembic/versions/2025_11_20_1530-c4edeb795134_remove_url_error_status.py:32:1: D103 Missing docstring in public function
pass
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@

from src.api.endpoints.metrics.batches.aggregated.query.models.strategy_count import CountByBatchStrategyResponse
from src.collectors.enums import URLStatus
from src.db.helpers.query import exists_url
from src.db.helpers.session import session_helper as sh
from src.db.models.impl.batch.sqlalchemy import Batch
from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
from src.db.models.impl.url.core.sqlalchemy import URL
from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError
from src.db.queries.base.builder import QueryBuilderBase


Expand All @@ -23,7 +25,9 @@ async def run(self, session: AsyncSession) -> list[CountByBatchStrategyResponse]
.select_from(Batch)
.join(LinkBatchURL)
.join(URL)
.where(URL.status == URLStatus.ERROR)
.where(
exists_url(URLTaskError)
)
.group_by(Batch.strategy, URL.status)
)

Expand Down
11 changes: 7 additions & 4 deletions src/api/endpoints/metrics/batches/breakdown/error/cte_.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from sqlalchemy import select, func, CTE, Column
from sqlalchemy import select, func

Check warning on line 1 in src/api/endpoints/metrics/batches/breakdown/error/cte_.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/metrics/batches/breakdown/error/cte_.py#L1 <100>

Missing docstring in public module
Raw output
./src/api/endpoints/metrics/batches/breakdown/error/cte_.py:1:1: D100 Missing docstring in public module

from src.collectors.enums import URLStatus
from src.api.endpoints.metrics.batches.breakdown.templates.cte_ import BatchesBreakdownURLCTE
from src.db.helpers.query import exists_url
from src.db.models.impl.batch.sqlalchemy import Batch
from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
from src.api.endpoints.metrics.batches.breakdown.templates.cte_ import BatchesBreakdownURLCTE
from src.db.models.impl.url.core.sqlalchemy import URL
from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError

URL_ERROR_CTE = BatchesBreakdownURLCTE(
select(
Expand All @@ -19,7 +20,9 @@
URL,
URL.id == LinkBatchURL.url_id
)
.where(URL.status == URLStatus.ERROR)
.where(
exists_url(URLTaskError)
)
.group_by(Batch.id)
.cte("error")
)
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from sqlalchemy import select, func

from src.collectors.enums import URLStatus
from src.db.helpers.query import exists_url
from src.db.models.impl.batch.sqlalchemy import Batch
from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL
from src.db.models.impl.url.core.sqlalchemy import URL
from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError
from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte_container import \
URLCountsCTEContainer

Expand All @@ -21,7 +23,7 @@
URL.id == LinkBatchURL.url_id,
)
.where(
URL.status == URLStatus.ERROR
exists_url(URLTaskError)
)
.group_by(
Batch.id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@ async def test_get_batch_summaries(api_test_helper):
count=4,
status=URLCreationEnum.NOT_RELEVANT
),
TestURLCreationParameters(
count=3,
status=URLCreationEnum.ERROR
)
]
),
TestBatchCreationParameters(
Expand Down Expand Up @@ -78,10 +74,10 @@ async def test_get_batch_summaries(api_test_helper):
result_2 = results[1]
assert result_2.id == batch_2_id
counts_2 = result_2.url_counts
assert counts_2.total == 7
assert counts_2.total == 4
assert counts_2.not_relevant == 4
assert counts_2.errored == 3
assert counts_2.pending == 3
assert counts_2.errored == 0
assert counts_2.pending == 0
assert counts_2.submitted == 0
assert counts_2.duplicate == 0

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@ async def test_get_batches_aggregated_metrics(
adb_client=adb_client,
strategy=CollectorType.MANUAL,
)
url_mappings_error: list[SimpleURLMapping] = await create_urls(
url_mappings_broken: list[SimpleURLMapping] = await create_urls(
adb_client=adb_client,
status=URLStatus.ERROR,
status=URLStatus.BROKEN,
count=4,
)
url_mappings_ok: list[SimpleURLMapping] = await create_urls(
adb_client=adb_client,
status=URLStatus.OK,
count=11,
)
url_mappings_all: list[SimpleURLMapping] = url_mappings_error + url_mappings_ok
url_mappings_all: list[SimpleURLMapping] = url_mappings_broken + url_mappings_ok
url_ids_all: list[int] = [url_mapping.url_id for url_mapping in url_mappings_all]
await create_batch_url_links(
adb_client=adb_client,
Expand Down Expand Up @@ -88,5 +88,5 @@ async def test_get_batches_aggregated_metrics(
assert inner_dto_manual.count_urls_pending == 15
assert inner_dto_manual.count_urls_submitted == 6
assert inner_dto_manual.count_urls_rejected == 9
assert inner_dto_manual.count_urls_errors == 12
assert inner_dto_manual.count_urls_errors == 0 # TODO: Change by adding URL Task Errors
assert inner_dto_manual.count_urls_validated == 30
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,6 @@ async def test_get_batches_breakdown_metrics(api_test_helper):
strategy=CollectorType.AUTO_GOOGLER,
date_generated=today - timedelta(days=14)
)
error_url_mappings: list[SimpleURLMapping] = await create_urls(
adb_client=adb_client,
status=URLStatus.ERROR,
count=4,
)
error_url_ids: list[int] = [url_mapping.url_id for url_mapping in error_url_mappings]
validated_url_mappings: list[SimpleURLMapping] = await create_urls(
adb_client=adb_client,
count=8,
Expand All @@ -73,7 +67,7 @@ async def test_get_batches_breakdown_metrics(api_test_helper):
await create_batch_url_links(
adb_client=adb_client,
batch_id=batch_id_3,
url_ids=error_url_ids + validated_url_ids,
url_ids=validated_url_ids,
)


Expand Down Expand Up @@ -107,11 +101,11 @@ async def test_get_batches_breakdown_metrics(api_test_helper):
assert dto_batch_3.batch_id == batch_id_3
assert dto_batch_3.status == BatchStatus.READY_TO_LABEL
assert dto_batch_3.strategy == CollectorType.AUTO_GOOGLER
assert dto_batch_3.count_url_total == 12
assert dto_batch_3.count_url_pending == 5
assert dto_batch_3.count_url_total == 8
assert dto_batch_3.count_url_pending == 1
assert dto_batch_3.count_url_submitted == 0
assert dto_batch_3.count_url_rejected == 3
assert dto_batch_3.count_url_error == 4
assert dto_batch_3.count_url_error == 0
assert dto_batch_3.count_url_validated == 7

dto_2 = await ath.request_validator.get_batches_breakdown_metrics(
Expand Down
6 changes: 0 additions & 6 deletions tests/automated/integration/api/metrics/test_backlog.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,6 @@ async def test_get_backlog_metrics(api_test_helper):
url_ids=not_relevant_url_ids_2[:4],
validation_type=URLType.NOT_RELEVANT
)
error_url_mappings_2: list[SimpleURLMapping] = await ddc.create_urls(
status=URLStatus.ERROR,
count=2
)
error_url_ids_2: list[int] = [url_mapping.url_id for url_mapping in error_url_mappings_2]
await ddc.create_batch_url_links(url_ids=error_url_ids_2, batch_id=batch_2_id)

await adb_client.populate_backlog_snapshot(
dt=today.subtract(months=2).naive()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ async def test_get_urls_aggregated_metrics(api_test_helper):
strategy=CollectorType.AUTO_GOOGLER,
)
url_mappings_2_ok: list[SimpleURLMapping] = await ddc.create_urls(batch_id=batch_2, count=4, status=URLStatus.OK)
url_mappings_2_error: list[SimpleURLMapping] = await ddc.create_urls(batch_id=batch_2, count=2, status=URLStatus.ERROR)
url_mappings_2_validated: list[SimpleURLMapping] = await ddc.create_validated_urls(count=1, validation_type=URLType.DATA_SOURCE)
url_mappings_2_not_relevant: list[SimpleURLMapping] = await ddc.create_validated_urls(count=5, validation_type=URLType.NOT_RELEVANT)
url_ids_2_validated: list[int] = [url_mapping.url_id for url_mapping in url_mappings_2_validated]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,6 @@ async def test_get_urls_breakdown_pending_metrics(api_test_helper):
count=3,
status=URLCreationEnum.SUBMITTED
),
TestURLCreationParameters(
count=4,
status=URLCreationEnum.ERROR
),
TestURLCreationParameters(
count=5,
status=URLCreationEnum.OK,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,6 @@ async def test_get_urls_breakdown_submitted_metrics(api_test_helper):
count=3,
status=URLCreationEnum.SUBMITTED
),
TestURLCreationParameters(
count=4,
status=URLCreationEnum.ERROR
),
TestURLCreationParameters(
count=5,
status=URLCreationEnum.VALIDATED
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
TestURLHTMLTaskSetupEntry(
url_info=TestURLInfo(
url="not-found-path.com/submitted",
status=URLStatus.ERROR
status=URLStatus.OK
),
web_metadata_info=TestWebMetadataInfo(
accessed=True,
Expand All @@ -48,7 +48,7 @@
TestURLHTMLTaskSetupEntry(
url_info=TestURLInfo(
url="error-path.com/submitted",
status=URLStatus.ERROR
status=URLStatus.OK
),
web_metadata_info=TestWebMetadataInfo(
accessed=True,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def convert_url_creation_enum_to_url_status(url_creation_enum: URLCreationEnum)
case URLCreationEnum.NOT_RELEVANT:
return URLStatus.OK
case URLCreationEnum.ERROR:
return URLStatus.ERROR
raise ValueError("Invalid URL Status")
case URLCreationEnum.DUPLICATE:
return URLStatus.DUPLICATE
case _:
Expand Down
3 changes: 1 addition & 2 deletions tests/manual/agency_identifier/test_nlp_processor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import pytest

from src.core.tasks.url.operators.agency_identification.subtasks.impl.nlp_location_match_.processor import \
NLPProcessor
from src.core.tasks.url.operators.location_id.subtasks.impl.nlp_location_freq.processor.nlp.core import NLPProcessor

SAMPLE_HTML: str = """
<html>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
import dotenv

from src.db.models.impl.batch.pydantic.info import BatchInfo
from src.collectors import CollectorType
from src.collectors.enums import CollectorType
from src.core.enums import BatchStatus
from test_automated.integration.core.helpers.common_test_procedures import run_collector_and_wait_for_completion


def test_auto_googler_collector_lifecycle(test_core):
Expand Down