From 804f1455e4905d836aec41ab2158d02e7a2ddaf1 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Thu, 26 Feb 2026 21:04:12 -0500 Subject: [PATCH] Add URL health view and expose health on data sources --- ...1b9d44_add_url_health_materialized_view.py | 79 +++++++++++++++++++ .../endpoints/data_source/_shared/build.py | 20 ++++- .../endpoints/data_source/_shared/process.py | 24 +++++- src/api/endpoints/data_source/get/response.py | 15 ++++ .../impl/data_sources/add/queries/get.py | 30 +++---- .../impl/data_sources/update/queries/cte.py | 35 +++++++- .../impl/data_sources/update/queries/get.py | 26 +++--- .../impl/sync_to_ds/shared/convert.py | 16 +++- .../materialized_views/url_health/__init__.py | 1 + .../materialized_views/url_health/enums.py | 7 ++ .../url_health/sqlalchemy.py | 22 ++++++ .../readonly/api/data_sources/test_get.py | 16 +++- .../impl/sync_to_ds/data_source/test_add.py | 3 + .../data_source/update/test_update_url.py | 4 +- 14 files changed, 254 insertions(+), 44 deletions(-) create mode 100644 alembic/versions/2026_02_27_1200-7a6c2e1b9d44_add_url_health_materialized_view.py create mode 100644 src/db/models/materialized_views/url_health/__init__.py create mode 100644 src/db/models/materialized_views/url_health/enums.py create mode 100644 src/db/models/materialized_views/url_health/sqlalchemy.py diff --git a/alembic/versions/2026_02_27_1200-7a6c2e1b9d44_add_url_health_materialized_view.py b/alembic/versions/2026_02_27_1200-7a6c2e1b9d44_add_url_health_materialized_view.py new file mode 100644 index 00000000..5b1400b7 --- /dev/null +++ b/alembic/versions/2026_02_27_1200-7a6c2e1b9d44_add_url_health_materialized_view.py @@ -0,0 +1,79 @@ +"""Add url health view + +Revision ID: 7a6c2e1b9d44 +Revises: 1fb2286a016c +Create Date: 2026-02-27 12:00:00.000000 + +""" +from typing import Sequence, Union + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = "7a6c2e1b9d44" +down_revision: Union[str, None] = "1fb2286a016c" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _create_url_health_view() -> None: + op.execute( + """ + CREATE VIEW url_health_view AS + WITH latest_redirect AS ( + SELECT DISTINCT ON (lur.source_url_id) + lur.source_url_id, + lur.destination_url_id + FROM link_urls_redirect_url lur + ORDER BY lur.source_url_id, lur.updated_at DESC, lur.created_at DESC + ) + SELECT + u.id AS url_id, + CASE + WHEN uwm.status_code = 200 THEN 'OK' + WHEN lr.destination_url_id IS NOT NULL AND redirect_uwm.status_code = 200 THEN 'OK' + WHEN uiapm.archive_url IS NOT NULL THEN 'Archived' + ELSE 'Broken' + END AS health, + CASE + WHEN uwm.status_code = 200 THEN 100 + WHEN lr.destination_url_id IS NOT NULL AND redirect_uwm.status_code = 200 THEN 150 + WHEN uiapm.archive_url IS NOT NULL THEN 200 + ELSE 300 + END AS code, + uwm.status_code, + lr.destination_url_id AS redirect_url_id, + CASE + WHEN redirect_u.scheme IS NOT NULL AND redirect_u.trailing_slash = TRUE + THEN redirect_u.scheme || '://' || redirect_u.url || '/' + WHEN redirect_u.scheme IS NOT NULL AND redirect_u.trailing_slash = FALSE + THEN redirect_u.scheme || '://' || redirect_u.url + ELSE redirect_u.url + END AS redirect_url, + redirect_uwm.status_code AS redirect_status_code, + (lr.destination_url_id IS NOT NULL) AS has_redirect, + (lr.destination_url_id IS NOT NULL AND redirect_uwm.status_code = 200) AS redirect_is_healthy, + (uiapm.archive_url IS NOT NULL) AS has_archive, + uiapm.archive_url + FROM urls u + LEFT JOIN url_web_metadata uwm + ON uwm.url_id = u.id + LEFT JOIN latest_redirect lr + ON lr.source_url_id = u.id + LEFT JOIN urls redirect_u + ON redirect_u.id = lr.destination_url_id + LEFT JOIN url_web_metadata redirect_uwm + ON redirect_uwm.url_id = lr.destination_url_id + LEFT JOIN url_internet_archives_probe_metadata uiapm + ON uiapm.url_id = u.id + """ + ) + + +def upgrade() -> None: + _create_url_health_view() + + +def downgrade() -> None: + op.execute("DROP VIEW IF EXISTS url_health_view") diff --git a/src/api/endpoints/data_source/_shared/build.py b/src/api/endpoints/data_source/_shared/build.py index 35b65343..83032e91 100644 --- a/src/api/endpoints/data_source/_shared/build.py +++ b/src/api/endpoints/data_source/_shared/build.py @@ -7,6 +7,7 @@ from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType +from src.db.models.materialized_views.url_health.sqlalchemy import URLHealthMaterializedView def build_data_source_get_query() -> Select: @@ -39,7 +40,18 @@ def build_data_source_get_query() -> Select: URLOptionalDataSourceMetadata.scraper_url, URLOptionalDataSourceMetadata.submission_notes, URLOptionalDataSourceMetadata.access_notes, - URLOptionalDataSourceMetadata.access_types + URLOptionalDataSourceMetadata.access_types, + + URLHealthMaterializedView.health, + URLHealthMaterializedView.code, + URLHealthMaterializedView.status_code, + URLHealthMaterializedView.redirect_url_id, + URLHealthMaterializedView.redirect_url, + URLHealthMaterializedView.redirect_status_code, + URLHealthMaterializedView.has_redirect, + URLHealthMaterializedView.redirect_is_healthy, + URLHealthMaterializedView.has_archive, + URLHealthMaterializedView.archive_url, ) .join( URLRecordType, @@ -60,7 +72,11 @@ def build_data_source_get_query() -> Select: URLOptionalDataSourceMetadata, URLOptionalDataSourceMetadata.url_id == URL.id ) + .outerjoin( + URLHealthMaterializedView, + URLHealthMaterializedView.url_id == URL.id, + ) .options( selectinload(URL.confirmed_agencies), ) - ) \ No newline at end of file + ) diff --git a/src/api/endpoints/data_source/_shared/process.py b/src/api/endpoints/data_source/_shared/process.py index 252ed7c0..e755fdd9 100644 --- a/src/api/endpoints/data_source/_shared/process.py +++ b/src/api/endpoints/data_source/_shared/process.py @@ -1,10 +1,11 @@ from sqlalchemy import RowMapping -from src.api.endpoints.data_source.get.response import DataSourceGetResponse +from src.api.endpoints.data_source.get.response import DataSourceGetResponse, DataSourceURLHealthResponse from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType +from src.db.models.materialized_views.url_health.sqlalchemy import URLHealthMaterializedView def process_data_source_get_mapping( @@ -16,6 +17,22 @@ def process_data_source_get_mapping( for agency in url.confirmed_agencies: url_agency_ids.append(agency.id) + url_health: DataSourceURLHealthResponse | None = None + health_value = mapping[URLHealthMaterializedView.health] + if health_value is not None: + url_health = DataSourceURLHealthResponse( + value=health_value, + code=mapping[URLHealthMaterializedView.code], + http_status_code=mapping[URLHealthMaterializedView.status_code], + redirect_url_id=mapping[URLHealthMaterializedView.redirect_url_id], + redirect_url=mapping[URLHealthMaterializedView.redirect_url], + redirect_http_status_code=mapping[URLHealthMaterializedView.redirect_status_code], + has_redirect=mapping[URLHealthMaterializedView.has_redirect] or False, + redirect_is_healthy=mapping[URLHealthMaterializedView.redirect_is_healthy] or False, + has_archive=mapping[URLHealthMaterializedView.has_archive] or False, + archive_url=mapping[URLHealthMaterializedView.archive_url] or None, + ) + return DataSourceGetResponse( url_id=mapping[URL.id], url=mapping[URL.url], @@ -40,5 +57,6 @@ def process_data_source_get_mapping( scraper_url=mapping[URLOptionalDataSourceMetadata.scraper_url], submission_notes=mapping[URLOptionalDataSourceMetadata.submission_notes], access_notes=mapping[URLOptionalDataSourceMetadata.access_notes], - access_types=mapping[URLOptionalDataSourceMetadata.access_types] or [] - ) \ No newline at end of file + access_types=mapping[URLOptionalDataSourceMetadata.access_types] or [], + url_health=url_health, + ) diff --git a/src/api/endpoints/data_source/get/response.py b/src/api/endpoints/data_source/get/response.py index b80ee9e1..782cd51b 100644 --- a/src/api/endpoints/data_source/get/response.py +++ b/src/api/endpoints/data_source/get/response.py @@ -5,6 +5,20 @@ from src.core.enums import RecordType from src.db.models.impl.url.optional_ds_metadata.enums import AgencyAggregationEnum, UpdateMethodEnum, \ RetentionScheduleEnum, AccessTypeEnum +from src.db.models.materialized_views.url_health.enums import URLHealthViewEnum + + +class DataSourceURLHealthResponse(BaseModel): + value: URLHealthViewEnum + code: int + http_status_code: int | None = None + redirect_url_id: int | None = None + redirect_url: str | None = None + redirect_http_status_code: int | None = None + has_redirect: bool = False + redirect_is_healthy: bool = False + has_archive: bool = False + archive_url: str | None = None class DataSourceGetResponse(BaseModel): @@ -38,6 +52,7 @@ class DataSourceGetResponse(BaseModel): submission_notes: str | None = None access_notes: str | None = None access_types: list[AccessTypeEnum] + url_health: DataSourceURLHealthResponse | None = None class DataSourceGetOuterResponse(BaseModel): results: list[DataSourceGetResponse] diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py index 487850dd..7dcf9f18 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py @@ -6,16 +6,13 @@ from src.core.tasks.scheduled.impl.sync_to_ds.constants import PER_REQUEST_ENTITY_LIMIT from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.add.queries.cte import \ DSAppLinkSyncDataSourceAddPrerequisitesCTEContainer -from src.core.tasks.scheduled.impl.sync_to_ds.shared.convert import convert_sm_url_status_to_ds_url_status +from src.core.tasks.scheduled.impl.sync_to_ds.shared.convert import convert_sm_url_health_to_ds_url_status from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType -from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata -from src.db.models.materialized_views.url_status.sqlalchemy import URLStatusMaterializedView +from src.db.models.materialized_views.url_health.sqlalchemy import URLHealthMaterializedView from src.db.queries.base.builder import QueryBuilderBase -from src.external.pdap.enums import DataSourcesURLStatus from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel from src.external.pdap.impl.sync.data_sources.add.request import AddDataSourcesOuterRequest, AddDataSourcesInnerRequest @@ -42,7 +39,6 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest: # Required URL.full_url, URL.name, - URLWebMetadata.status_code, URLRecordType.record_type, agency_id_cte.c.agency_ids, # Optional @@ -61,7 +57,9 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest: URLOptionalDataSourceMetadata.scraper_url, URLOptionalDataSourceMetadata.access_notes, URLOptionalDataSourceMetadata.access_types, - URLInternetArchivesProbeMetadata.archive_url, + URLHealthMaterializedView.health, + URLHealthMaterializedView.has_archive, + URLHealthMaterializedView.archive_url, ) .select_from( cte.cte @@ -75,12 +73,8 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest: URL.id == URLOptionalDataSourceMetadata.url_id, ) .outerjoin( - URLWebMetadata, - URL.id == URLWebMetadata.url_id - ) - .outerjoin( - URLInternetArchivesProbeMetadata, - URL.id == URLInternetArchivesProbeMetadata.url_id, + URLHealthMaterializedView, + URL.id == URLHealthMaterializedView.url_id, ) .join( URLRecordType, @@ -124,15 +118,15 @@ async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest: scraper_url=mapping[URLOptionalDataSourceMetadata.scraper_url], access_notes=mapping[URLOptionalDataSourceMetadata.access_notes], access_types=mapping[URLOptionalDataSourceMetadata.access_types] or [], - # TODO: Change to convert web metadata result to URL Status - url_status=convert_sm_url_status_to_ds_url_status( - mapping[URLWebMetadata.status_code], + url_status=convert_sm_url_health_to_ds_url_status( + health=mapping[URLHealthMaterializedView.health], + has_archive=mapping[URLHealthMaterializedView.has_archive] or False, ), - internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None, + internet_archives_url=mapping[URLHealthMaterializedView.archive_url] or None, ) ) ) return AddDataSourcesOuterRequest( data_sources=inner_requests, - ) \ No newline at end of file + ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/cte.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/cte.py index b1c21474..ffc370ad 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/cte.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/cte.py @@ -1,9 +1,11 @@ -from sqlalchemy import select, or_, Column, CTE +from sqlalchemy import select, or_, Column, CTE, and_, exists +from src.db.models.impl.link.url_redirect_url.sqlalchemy import LinkURLRedirectURL from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata class DSAppLinkSyncDataSourceUpdatePrerequisitesCTEContainer: @@ -32,6 +34,35 @@ def __init__(self): URLOptionalDataSourceMetadata.updated_at > DSAppLinkDataSource.last_synced_at, URLRecordType.created_at > DSAppLinkDataSource.last_synced_at, URLRecordType.updated_at > DSAppLinkDataSource.last_synced_at, + exists( + select(URLWebMetadata.url_id).where( + and_( + URLWebMetadata.url_id == DSAppLinkDataSource.url_id, + URLWebMetadata.updated_at > DSAppLinkDataSource.last_synced_at, + ) + ) + ), + exists( + select(LinkURLRedirectURL.source_url_id).where( + and_( + LinkURLRedirectURL.source_url_id == DSAppLinkDataSource.url_id, + LinkURLRedirectURL.updated_at > DSAppLinkDataSource.last_synced_at, + ) + ) + ), + exists( + select(LinkURLRedirectURL.source_url_id) + .join( + URLWebMetadata, + URLWebMetadata.url_id == LinkURLRedirectURL.destination_url_id, + ) + .where( + and_( + LinkURLRedirectURL.source_url_id == DSAppLinkDataSource.url_id, + URLWebMetadata.updated_at > DSAppLinkDataSource.last_synced_at, + ) + ) + ), ) ).cte("ds_app_link_sync_data_source_update_prerequisites") ) @@ -46,4 +77,4 @@ def ds_data_source_id(self) -> Column[int]: @property def cte(self) -> CTE: - return self._cte \ No newline at end of file + return self._cte diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py index 8b23f339..c0cb1c12 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py @@ -6,15 +6,13 @@ from src.core.tasks.scheduled.impl.sync_to_ds.constants import PER_REQUEST_ENTITY_LIMIT from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.queries.cte import \ DSAppLinkSyncDataSourceUpdatePrerequisitesCTEContainer -from src.core.tasks.scheduled.impl.sync_to_ds.shared.convert import convert_sm_url_status_to_ds_url_status +from src.core.tasks.scheduled.impl.sync_to_ds.shared.convert import convert_sm_url_health_to_ds_url_status from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType -from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from src.db.models.materialized_views.url_health.sqlalchemy import URLHealthMaterializedView from src.db.queries.base.builder import QueryBuilderBase -from src.external.pdap.enums import DataSourcesURLStatus from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel from src.external.pdap.impl.sync.data_sources.update.request import UpdateDataSourcesOuterRequest, \ UpdateDataSourcesInnerRequest @@ -42,7 +40,6 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest: # Required URL.full_url, URL.name, - URLWebMetadata.status_code, URLRecordType.record_type, agency_id_cte.c.agency_ids, # Optional @@ -62,7 +59,9 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest: URLOptionalDataSourceMetadata.access_notes, URLOptionalDataSourceMetadata.access_types, URLOptionalDataSourceMetadata.data_portal_type_other, - URLInternetArchivesProbeMetadata.archive_url, + URLHealthMaterializedView.health, + URLHealthMaterializedView.has_archive, + URLHealthMaterializedView.archive_url, ) .select_from( cte.cte @@ -76,17 +75,13 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest: URL.id == URLOptionalDataSourceMetadata.url_id, ) .outerjoin( - URLInternetArchivesProbeMetadata, - URL.id == URLInternetArchivesProbeMetadata.url_id, + URLHealthMaterializedView, + URLHealthMaterializedView.url_id == URL.id, ) .join( URLRecordType, URLRecordType.url_id == URL.id, ) - .outerjoin( - URLWebMetadata, - URLWebMetadata.url_id == URL.id, - ) .outerjoin( agency_id_cte, cte.url_id == agency_id_cte.c.url_id @@ -126,10 +121,11 @@ async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest: access_notes=mapping[URLOptionalDataSourceMetadata.access_notes], access_types=mapping[URLOptionalDataSourceMetadata.access_types] or [], data_portal_type_other=mapping[URLOptionalDataSourceMetadata.data_portal_type_other], - url_status=convert_sm_url_status_to_ds_url_status( - mapping[URLWebMetadata.status_code], + url_status=convert_sm_url_health_to_ds_url_status( + health=mapping[URLHealthMaterializedView.health], + has_archive=mapping[URLHealthMaterializedView.has_archive] or False, ), - internet_archives_url=mapping[URLInternetArchivesProbeMetadata.archive_url] or None, + internet_archives_url=mapping[URLHealthMaterializedView.archive_url] or None, ) ) ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py b/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py index 3de3e502..806ada00 100644 --- a/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py +++ b/src/core/tasks/scheduled/impl/sync_to_ds/shared/convert.py @@ -1,11 +1,23 @@ from src.external.pdap.enums import DataSourcesURLStatus +from src.db.models.materialized_views.url_health.enums import URLHealthViewEnum def convert_sm_url_status_to_ds_url_status( - status_code: int + status_code: int | None ) -> DataSourcesURLStatus: match status_code: case 200: return DataSourcesURLStatus.OK case _: - return DataSourcesURLStatus.BROKEN \ No newline at end of file + return DataSourcesURLStatus.BROKEN + + +def convert_sm_url_health_to_ds_url_status( + health: URLHealthViewEnum | str | None, + has_archive: bool, +) -> DataSourcesURLStatus: + if health == URLHealthViewEnum.OK.value: + return DataSourcesURLStatus.OK + if has_archive: + return DataSourcesURLStatus.AVAILABLE + return DataSourcesURLStatus.BROKEN diff --git a/src/db/models/materialized_views/url_health/__init__.py b/src/db/models/materialized_views/url_health/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/src/db/models/materialized_views/url_health/__init__.py @@ -0,0 +1 @@ + diff --git a/src/db/models/materialized_views/url_health/enums.py b/src/db/models/materialized_views/url_health/enums.py new file mode 100644 index 00000000..c14403cf --- /dev/null +++ b/src/db/models/materialized_views/url_health/enums.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class URLHealthViewEnum(Enum): + OK = "OK" + ARCHIVED = "Archived" + BROKEN = "Broken" diff --git a/src/db/models/materialized_views/url_health/sqlalchemy.py b/src/db/models/materialized_views/url_health/sqlalchemy.py new file mode 100644 index 00000000..5b84b159 --- /dev/null +++ b/src/db/models/materialized_views/url_health/sqlalchemy.py @@ -0,0 +1,22 @@ +from sqlalchemy.orm import Mapped + +from src.db.models.mixins import URLDependentViewMixin +from src.db.models.templates_.base import Base + + +class URLHealthMaterializedView( + Base, + URLDependentViewMixin +): + __tablename__ = "url_health_view" + + health: Mapped[str] + code: Mapped[int] + status_code: Mapped[int | None] + redirect_url_id: Mapped[int | None] + redirect_url: Mapped[str | None] + redirect_status_code: Mapped[int | None] + has_redirect: Mapped[bool] + redirect_is_healthy: Mapped[bool] + has_archive: Mapped[bool] + archive_url: Mapped[str | None] diff --git a/tests/automated/integration/readonly/api/data_sources/test_get.py b/tests/automated/integration/readonly/api/data_sources/test_get.py index c23d2177..1d036e09 100644 --- a/tests/automated/integration/readonly/api/data_sources/test_get.py +++ b/tests/automated/integration/readonly/api/data_sources/test_get.py @@ -3,10 +3,12 @@ import pytest from deepdiff import DeepDiff -from src.api.endpoints.data_source.get.response import DataSourceGetOuterResponse, DataSourceGetResponse +from src.api.endpoints.data_source.get.response import DataSourceGetOuterResponse, DataSourceGetResponse, \ + DataSourceURLHealthResponse from src.core.enums import RecordType from src.db.models.impl.url.optional_ds_metadata.enums import AgencyAggregationEnum, UpdateMethodEnum, \ RetentionScheduleEnum, AccessTypeEnum +from src.db.models.materialized_views.url_health.enums import URLHealthViewEnum from tests.automated.integration.readonly.helper import ReadOnlyTestHelper @@ -51,6 +53,18 @@ async def test_get(readonly_helper: ReadOnlyTestHelper): submission_notes="Read Only Submission Notes", access_notes="Read Only Access Notes", access_types=[AccessTypeEnum.WEBPAGE, AccessTypeEnum.API], + url_health=DataSourceURLHealthResponse( + value=URLHealthViewEnum.BROKEN, + code=300, + http_status_code=None, + redirect_url_id=None, + redirect_url=None, + redirect_http_status_code=None, + has_redirect=False, + redirect_is_healthy=False, + has_archive=False, + archive_url=None, + ), ).model_dump(mode='json'), ) diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py index 2e57e042..66869e72 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py @@ -5,6 +5,7 @@ from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.external.pdap.client import PDAPClient +from src.external.pdap.enums import DataSourcesURLStatus from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel from src.external.pdap.impl.sync.data_sources.add.request import AddDataSourcesOuterRequest, AddDataSourcesInnerRequest from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseModel, \ @@ -78,6 +79,8 @@ async def test_add( assert content.access_notes is None assert content.access_types == [] assert content.data_portal_type_other is None + assert content.url_status == DataSourcesURLStatus.BROKEN + assert content.internet_archives_url is None assert content.agency_ids == [test_agency_id] diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_url.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_url.py index 78c095c0..a98cb29c 100644 --- a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_url.py +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_url.py @@ -7,6 +7,7 @@ from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.external.pdap.client import PDAPClient +from src.external.pdap.enums import DataSourcesURLStatus from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel from src.external.pdap.impl.sync.data_sources.update.request import UpdateDataSourcesInnerRequest, \ UpdateDataSourcesOuterRequest @@ -72,10 +73,11 @@ async def test_update_url( ] assert content.source_url == "http://modified-example.com/" assert content.description == "Updated URL Description" + assert content.url_status == DataSourcesURLStatus.BROKEN + assert content.internet_archives_url is None # Check DS App Link Is Updated ds_app_link: DSAppLinkDataSource | None = await adb_client_test.one_or_none_model(model=DSAppLinkDataSource) assert ds_app_link is not None assert ds_app_link.ds_data_source_id == 67 assert ds_app_link.last_synced_at > ds_app_linked_data_source_url.updated_at -