Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
"""Add missing location/agency URL status

Revision ID: c4f9bbf8a201
Revises: 1fb2286a016c
Create Date: 2026-02-27 12:00:00.000000

"""
from typing import Sequence, Union

from alembic import op


# revision identifiers, used by Alembic.
revision: str = "c4f9bbf8a201"
down_revision: Union[str, None] = "1fb2286a016c"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def _create_url_status_mat_view() -> None:
op.execute(
"""
CREATE MATERIALIZED VIEW url_status_mat_view AS
WITH
urls_with_relevant_errors AS (
SELECT
ute.url_id
FROM
url_task_error ute
WHERE
ute.task_type = ANY (
ARRAY[
'Screenshot'::task_type,
'HTML'::task_type,
'URL Probe'::task_type
]
)
),
unresolved_missing_queue AS (
SELECT
fus.url_id
FROM
flag_url_suspended fus
LEFT JOIN link_user_suggestion_location_not_found luslnf
ON luslnf.url_id = fus.url_id
LEFT JOIN link_user_suggestion_agency_not_found lusanf
ON lusanf.url_id = fus.url_id
GROUP BY
fus.url_id
HAVING
count(luslnf.user_id) >= 2
OR count(lusanf.user_id) >= 2
),
status_text AS (
SELECT
u.id AS url_id,
CASE
WHEN fuv.type = ANY (
ARRAY[
'not relevant'::url_type,
'individual record'::url_type,
'not found'::url_type
]
) THEN 'Accepted'::text
WHEN (
fuv.type = 'data source'::url_type
AND uds.url_id IS NULL
) OR (
fuv.type = 'meta url'::url_type
AND udmu.url_id IS NULL
) THEN 'Awaiting Submission'::text
WHEN (
fuv.type = 'data source'::url_type
AND uds.url_id IS NOT NULL
) OR (
fuv.type = 'meta url'::url_type
AND udmu.url_id IS NOT NULL
) THEN 'Submitted'::text
WHEN fuv.type IS NULL AND umq.url_id IS NOT NULL THEN 'Missing Location / Agency'::text
WHEN uch.url_id IS NOT NULL
AND uwm.url_id IS NOT NULL
AND us.url_id IS NOT NULL THEN 'Community Labeling'::text
WHEN uwre.url_id IS NOT NULL THEN 'Error'::text
ELSE 'Intake'::text
END AS status
FROM
urls u
LEFT JOIN urls_with_relevant_errors uwre
ON u.id = uwre.url_id
LEFT JOIN url_screenshot us
ON u.id = us.url_id
LEFT JOIN url_compressed_html uch
ON u.id = uch.url_id
LEFT JOIN url_web_metadata uwm
ON u.id = uwm.url_id
LEFT JOIN flag_url_validated fuv
ON u.id = fuv.url_id
LEFT JOIN ds_app_link_meta_url udmu
ON u.id = udmu.url_id
LEFT JOIN ds_app_link_data_source uds
ON u.id = uds.url_id
LEFT JOIN unresolved_missing_queue umq
ON u.id = umq.url_id
)
SELECT
status_text.url_id,
status_text.status,
CASE status_text.status
WHEN 'Intake'::text THEN 100
WHEN 'Error'::text THEN 110
WHEN 'Community Labeling'::text THEN 200
WHEN 'Accepted'::text THEN 300
WHEN 'Missing Location / Agency'::text THEN 320
WHEN 'Awaiting Submission'::text THEN 380
WHEN 'Submitted'::text THEN 390
ELSE '-1'::integer
END AS code
FROM
status_text;
"""
)


def upgrade() -> None:

Check warning on line 124 in alembic/versions/2026_02_27_1200-c4f9bbf8a201_add_missing_location_agency_status.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2026_02_27_1200-c4f9bbf8a201_add_missing_location_agency_status.py#L124 <103>

Missing docstring in public function
Raw output
./alembic/versions/2026_02_27_1200-c4f9bbf8a201_add_missing_location_agency_status.py:124:1: D103 Missing docstring in public function
op.execute("DROP MATERIALIZED VIEW IF EXISTS url_status_mat_view")
_create_url_status_mat_view()


def downgrade() -> None:

Check warning on line 129 in alembic/versions/2026_02_27_1200-c4f9bbf8a201_add_missing_location_agency_status.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] alembic/versions/2026_02_27_1200-c4f9bbf8a201_add_missing_location_agency_status.py#L129 <103>

Missing docstring in public function
Raw output
./alembic/versions/2026_02_27_1200-c4f9bbf8a201_add_missing_location_agency_status.py:129:1: D103 Missing docstring in public function
op.execute("DROP MATERIALIZED VIEW IF EXISTS url_status_mat_view")
op.execute(
"""
CREATE MATERIALIZED VIEW url_status_mat_view AS
WITH
urls_with_relevant_errors AS (
SELECT
ute.url_id
FROM
url_task_error ute
WHERE
ute.task_type = ANY (ARRAY ['Screenshot'::task_type, 'HTML'::task_type, 'URL Probe'::task_type])
),
status_text AS (
SELECT
u.id AS url_id,
CASE
WHEN fuv.type = ANY (
ARRAY['not relevant'::url_type, 'individual record'::url_type, 'not found'::url_type]
) THEN 'Accepted'::text
WHEN fuv.type = 'data source'::url_type AND uds.url_id IS NULL OR
fuv.type = 'meta url'::url_type AND udmu.url_id IS NULL THEN 'Awaiting Submission'::text
WHEN fuv.type = 'data source'::url_type AND uds.url_id IS NOT NULL OR
fuv.type = 'meta url'::url_type AND udmu.url_id IS NOT NULL THEN 'Submitted'::text
WHEN uch.url_id IS NOT NULL AND uwm.url_id IS NOT NULL AND us.url_id IS NOT NULL
THEN 'Community Labeling'::text
WHEN uwre.url_id IS NOT NULL THEN 'Error'::text
ELSE 'Intake'::text
END AS status
FROM
urls u
LEFT JOIN urls_with_relevant_errors uwre
ON u.id = uwre.url_id
LEFT JOIN url_screenshot us
ON u.id = us.url_id
LEFT JOIN url_compressed_html uch
ON u.id = uch.url_id
LEFT JOIN url_web_metadata uwm
ON u.id = uwm.url_id
LEFT JOIN flag_url_validated fuv
ON u.id = fuv.url_id
LEFT JOIN ds_app_link_meta_url udmu
ON u.id = udmu.url_id
LEFT JOIN ds_app_link_data_source uds
ON u.id = uds.url_id
)
SELECT
status_text.url_id,
status_text.status,
CASE status_text.status
WHEN 'Intake'::text THEN 100
WHEN 'Error'::text THEN 110
WHEN 'Community Labeling'::text THEN 200
WHEN 'Accepted'::text THEN 300
WHEN 'Awaiting Submission'::text THEN 380
WHEN 'Submitted'::text THEN 390
ELSE '-1'::integer
END AS code
FROM
status_text;
"""
)
1 change: 1 addition & 0 deletions src/api/endpoints/annotate/missing/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Check warning on line 1 in src/api/endpoints/annotate/missing/__init__.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/missing/__init__.py#L1 <104>

Missing docstring in public package
Raw output
./src/api/endpoints/annotate/missing/__init__.py:1:1: D104 Missing docstring in public package

Check warning on line 1 in src/api/endpoints/annotate/missing/__init__.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/missing/__init__.py#L1 <391>

blank line at end of file
Raw output
./src/api/endpoints/annotate/missing/__init__.py:1:1: W391 blank line at end of file
1 change: 1 addition & 0 deletions src/api/endpoints/annotate/missing/get/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Check warning on line 1 in src/api/endpoints/annotate/missing/get/__init__.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/missing/get/__init__.py#L1 <104>

Missing docstring in public package
Raw output
./src/api/endpoints/annotate/missing/get/__init__.py:1:1: D104 Missing docstring in public package

Check warning on line 1 in src/api/endpoints/annotate/missing/get/__init__.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/missing/get/__init__.py#L1 <391>

blank line at end of file
Raw output
./src/api/endpoints/annotate/missing/get/__init__.py:1:1: W391 blank line at end of file
20 changes: 20 additions & 0 deletions src/api/endpoints/annotate/missing/get/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from pydantic import BaseModel, Field

Check warning on line 1 in src/api/endpoints/annotate/missing/get/models.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/missing/get/models.py#L1 <100>

Missing docstring in public module
Raw output
./src/api/endpoints/annotate/missing/get/models.py:1:1: D100 Missing docstring in public module


class MissingAnnotationQueueEntry(BaseModel):

Check warning on line 4 in src/api/endpoints/annotate/missing/get/models.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/missing/get/models.py#L4 <101>

Missing docstring in public class
Raw output
./src/api/endpoints/annotate/missing/get/models.py:4:1: D101 Missing docstring in public class
url_id: int = Field(
description="The URL ID in the missing location/agency queue."
)
url: str = Field(
description="The URL string."
)
missing_location_count: int = Field(
description="Number of users who marked the location as missing."
)
missing_agency_count: int = Field(
description="Number of users who marked the agency as missing."
)


class MissingAnnotationQueueResponse(BaseModel):

Check warning on line 19 in src/api/endpoints/annotate/missing/get/models.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/missing/get/models.py#L19 <101>

Missing docstring in public class
Raw output
./src/api/endpoints/annotate/missing/get/models.py:19:1: D101 Missing docstring in public class
entries: list[MissingAnnotationQueueEntry]
86 changes: 86 additions & 0 deletions src/api/endpoints/annotate/missing/get/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from typing import Sequence

Check warning on line 1 in src/api/endpoints/annotate/missing/get/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/missing/get/query.py#L1 <100>

Missing docstring in public module
Raw output
./src/api/endpoints/annotate/missing/get/query.py:1:1: D100 Missing docstring in public module

from sqlalchemy import select, RowMapping, func, or_
from sqlalchemy.ext.asyncio import AsyncSession

from src.api.endpoints.annotate.missing.get.models import MissingAnnotationQueueEntry, MissingAnnotationQueueResponse
from src.db.helpers.session import session_helper as sh
from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended
from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound
from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound
from src.db.models.impl.url.core.sqlalchemy import URL
from src.db.models.views.unvalidated_url import UnvalidatedURL
from src.db.queries.base.builder import QueryBuilderBase


class GetMissingAnnotationQueueQueryBuilder(QueryBuilderBase):

Check warning on line 16 in src/api/endpoints/annotate/missing/get/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/missing/get/query.py#L16 <101>

Missing docstring in public class
Raw output
./src/api/endpoints/annotate/missing/get/query.py:16:1: D101 Missing docstring in public class

def __init__(

Check warning on line 18 in src/api/endpoints/annotate/missing/get/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/missing/get/query.py#L18 <107>

Missing docstring in __init__
Raw output
./src/api/endpoints/annotate/missing/get/query.py:18:1: D107 Missing docstring in __init__
self,
limit: int = 200,
):
super().__init__()
self.limit = limit

async def run(

Check warning on line 25 in src/api/endpoints/annotate/missing/get/query.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/missing/get/query.py#L25 <102>

Missing docstring in public method
Raw output
./src/api/endpoints/annotate/missing/get/query.py:25:1: D102 Missing docstring in public method
self,
session: AsyncSession
) -> MissingAnnotationQueueResponse:
location_count_subquery = (
select(
func.count(LinkUserSuggestionLocationNotFound.user_id)
)
.where(
LinkUserSuggestionLocationNotFound.url_id == URL.id
)
.scalar_subquery()
)

agency_count_subquery = (
select(
func.count(LinkUserSuggestionAgencyNotFound.user_id)
)
.where(
LinkUserSuggestionAgencyNotFound.url_id == URL.id
)
.scalar_subquery()
)

query = (
select(
URL.id.label("url_id"),
URL.full_url.label("url"),
location_count_subquery.label("missing_location_count"),
agency_count_subquery.label("missing_agency_count"),
)
.join(
FlagURLSuspended,
FlagURLSuspended.url_id == URL.id,
)
.join(
UnvalidatedURL,
UnvalidatedURL.url_id == URL.id,
)
.where(
or_(
location_count_subquery >= 2,
agency_count_subquery >= 2,
)
)
.order_by(URL.id.asc())
.limit(self.limit)
)

mappings: Sequence[RowMapping] = await sh.mappings(
session=session,
query=query,
)

return MissingAnnotationQueueResponse(
entries=[
MissingAnnotationQueueEntry(
**mapping,
)
for mapping in mappings
]
)
1 change: 1 addition & 0 deletions src/api/endpoints/annotate/missing/post/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Check warning on line 1 in src/api/endpoints/annotate/missing/post/__init__.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/missing/post/__init__.py#L1 <104>

Missing docstring in public package
Raw output
./src/api/endpoints/annotate/missing/post/__init__.py:1:1: D104 Missing docstring in public package

Check warning on line 1 in src/api/endpoints/annotate/missing/post/__init__.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/missing/post/__init__.py#L1 <391>

blank line at end of file
Raw output
./src/api/endpoints/annotate/missing/post/__init__.py:1:1: W391 blank line at end of file
6 changes: 6 additions & 0 deletions src/api/endpoints/annotate/missing/post/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from src.api.shared.models.request_base import RequestBase

Check warning on line 1 in src/api/endpoints/annotate/missing/post/models.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/missing/post/models.py#L1 <100>

Missing docstring in public module
Raw output
./src/api/endpoints/annotate/missing/post/models.py:1:1: D100 Missing docstring in public module


class ResolveMissingAnnotationRequest(RequestBase):

Check warning on line 4 in src/api/endpoints/annotate/missing/post/models.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/api/endpoints/annotate/missing/post/models.py#L4 <101>

Missing docstring in public class
Raw output
./src/api/endpoints/annotate/missing/post/models.py:4:1: D101 Missing docstring in public class
location_id: int
agency_id: int
Loading
Loading