From 25be7a81666e5e1bc184c5c5692b6a51dbb63ad2 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Thu, 26 Feb 2026 20:13:48 -0500 Subject: [PATCH 1/2] Remove reviewdog and document lint directives --- .github/workflows/python_checks.yml | 8 ++------ CLAUDE.md | 4 ++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python_checks.yml b/.github/workflows/python_checks.yml index 7f5bef91..5eda0df5 100644 --- a/.github/workflows/python_checks.yml +++ b/.github/workflows/python_checks.yml @@ -14,9 +14,5 @@ jobs: with: python-version: "3.11" - run: pip install flake8 flake8-docstrings flake8-simplify flake8-unused-arguments flake8-annotations - - name: flake8 Lint with Reviewdog - uses: reviewdog/action-flake8@v3 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - flake8_args: --ignore E501,W291,W293,D401,D400,E402,E302,D200,D202,D205,W503,E203,D204,D403 - level: warning + - name: Run flake8 + run: flake8 . --ignore E501,W291,W293,D401,D400,E402,E302,D200,D202,D205,W503,E203,D204,D403 diff --git a/CLAUDE.md b/CLAUDE.md index 23f1547b..1346d8aa 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -113,6 +113,10 @@ See `ENV.md` for the full reference. Key categories: ## Important Notes +- Follow the repository's linting style guide when writing or editing Python: + - Run `flake8` with this exact ignore set in mind: `E501,W291,W293,D401,D400,E402,E302,D200,D202,D205,W503,E203,D204,D403` + - Keep import ordering and blank-line usage compliant with `flake8` defaults aside from the ignore list above + - Prefer concise docstrings where present and avoid introducing new docstring violations outside ignored codes - The app exposes its API docs at `/api` (not the default `/docs` — `/docs` redirects to `/api`). - CORS is configured for `localhost:8888`, `pdap.io`, and `pdap.dev`. - Two permission levels: `access_source_collector` (general) and `source_collector_final_review` (final review). From d4105748fb195bde3f471b2228266e816e5ee2ef Mon Sep 17 00:00:00 2001 From: Max Chis Date: Thu, 26 Feb 2026 20:27:16 -0500 Subject: [PATCH 2/2] Enforce national-only locations for federal agencies --- ...federal_agency_location_integrity_check.py | 81 +++++++++++++++++++ .../scheduled/impl/integrity/queries/cte.py | 3 +- .../federal_agencies_wrong_location.py | 34 ++++++++ .../test_federal_agencies_wrong_location.py | 81 +++++++++++++++++++ 4 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 alembic/versions/2026_02_27_0900-c2f46d1af640_add_federal_agency_location_integrity_check.py create mode 100644 src/db/models/views/integrity/federal_agencies_wrong_location.py create mode 100644 tests/automated/integration/tasks/scheduled/impl/integrity/test_federal_agencies_wrong_location.py diff --git a/alembic/versions/2026_02_27_0900-c2f46d1af640_add_federal_agency_location_integrity_check.py b/alembic/versions/2026_02_27_0900-c2f46d1af640_add_federal_agency_location_integrity_check.py new file mode 100644 index 00000000..90f80e9b --- /dev/null +++ b/alembic/versions/2026_02_27_0900-c2f46d1af640_add_federal_agency_location_integrity_check.py @@ -0,0 +1,81 @@ +"""Add federal agency location integrity check + +Revision ID: c2f46d1af640 +Revises: 1fb2286a016c +Create Date: 2026-02-27 09:00:00.000000 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = 'c2f46d1af640' +down_revision: Union[str, None] = '1fb2286a016c' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + _create_federal_agencies_wrong_location_view() + _normalize_federal_agency_locations() + + +def _create_federal_agencies_wrong_location_view() -> None: + op.execute(""" + create view integrity__federal_agencies_wrong_location_view as + with national_location as ( + select id + from locations + where type = 'National'::location_type + limit 1 + ) + select + ag.id as agency_id + from agencies ag + inner join link_agencies__locations link on ag.id = link.agency_id + cross join national_location nl + where ag.jurisdiction_type = 'federal' + group by ag.id, nl.id + having array_agg(link.location_id order by link.location_id) != array[nl.id::integer] + """) + + +def _normalize_federal_agency_locations() -> None: + op.execute(""" + with national_location as ( + select id + from locations + where type = 'National'::location_type + limit 1 + ) + delete from link_agencies__locations link + using agencies ag, national_location nl + where link.agency_id = ag.id + and ag.jurisdiction_type = 'federal' + and link.location_id != nl.id::integer + """) + + op.execute(""" + with national_location as ( + select id + from locations + where type = 'National'::location_type + limit 1 + ) + insert into link_agencies__locations (agency_id, location_id) + select + ag.id, + nl.id::integer + from agencies ag + cross join national_location nl + left join link_agencies__locations link + on link.agency_id = ag.id + and link.location_id = nl.id::integer + where ag.jurisdiction_type = 'federal' + and link.agency_id is null + """) + + +def downgrade() -> None: + op.execute("drop view if exists integrity__federal_agencies_wrong_location_view") diff --git a/src/core/tasks/scheduled/impl/integrity/queries/cte.py b/src/core/tasks/scheduled/impl/integrity/queries/cte.py index dc894ea7..41b3e288 100644 --- a/src/core/tasks/scheduled/impl/integrity/queries/cte.py +++ b/src/core/tasks/scheduled/impl/integrity/queries/cte.py @@ -1,6 +1,7 @@ from sqlalchemy import select, literal, Exists, Label, or_ from src.db.models.templates_.base import Base +from src.db.models.views.integrity.federal_agencies_wrong_location import IntegrityFederalAgenciesWrongLocation from src.db.models.views.integrity.incomplete_data_sources import IntegrityIncompleteDataSource from src.db.models.views.integrity.incomplete_meta_urls import IntegrityIncompleteMetaURL from src.db.models.views.integrity.non_federal_agencies_no_location import IntegrityNonFederalAgenciesNoLocation @@ -27,6 +28,7 @@ def __init__( ): self.models: list[type[Base]] = [ IntegrityURLBothDataSourceAndMetaURL, + IntegrityFederalAgenciesWrongLocation, IntegrityNonFederalAgenciesNoLocation, IntegrityIncompleteMetaURL, IntegrityIncompleteDataSource, @@ -58,4 +60,3 @@ def any_rows_exist_query(self) -> select: @property def select_all_columns_query(self) -> select: return select(self.cte) - diff --git a/src/db/models/views/integrity/federal_agencies_wrong_location.py b/src/db/models/views/integrity/federal_agencies_wrong_location.py new file mode 100644 index 00000000..4a644d55 --- /dev/null +++ b/src/db/models/views/integrity/federal_agencies_wrong_location.py @@ -0,0 +1,34 @@ +""" + create view integrity__federal_agencies_wrong_location_view as + with national_location as ( + select id + from locations + where type = 'National' + limit 1 + ) + select + ag.id as agency_id + from agencies ag + inner join link_agencies__locations link on ag.id = link.agency_id + cross join national_location nl + where ag.jurisdiction_type = 'federal' + group by ag.id, nl.id + having array_agg(link.location_id order by link.location_id) != array[nl.id::integer] +""" +from sqlalchemy import PrimaryKeyConstraint + +from src.db.models.helpers import VIEW_ARG +from src.db.models.mixins import AgencyDependentMixin, ViewMixin +from src.db.models.templates_.base import Base + + +class IntegrityFederalAgenciesWrongLocation( + Base, + ViewMixin, + AgencyDependentMixin +): + __tablename__ = "integrity__federal_agencies_wrong_location_view" + __table_args__ = ( + PrimaryKeyConstraint("agency_id"), + VIEW_ARG, + ) diff --git a/tests/automated/integration/tasks/scheduled/impl/integrity/test_federal_agencies_wrong_location.py b/tests/automated/integration/tasks/scheduled/impl/integrity/test_federal_agencies_wrong_location.py new file mode 100644 index 00000000..486fce4a --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/integrity/test_federal_agencies_wrong_location.py @@ -0,0 +1,81 @@ +import pytest +from sqlalchemy import delete, select + +from src.core.tasks.scheduled.impl.integrity.operator import IntegrityMonitorTaskOperator +from src.db import Location +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.db.models.impl.location.location.enums import LocationType +from tests.automated.integration.tasks.scheduled.impl.integrity.helpers import run_task_and_confirm_error +from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo + + +@pytest.mark.asyncio +async def test_core( + operator: IntegrityMonitorTaskOperator, + pittsburgh_locality: LocalityCreationInfo +): + # Check does not meet prerequisites + assert not await operator.meets_task_prerequisites() + + # Add federal agency + agency = Agency( + name="Federal Agency", + agency_type=AgencyType.COURT, + jurisdiction_type=JurisdictionType.FEDERAL + ) + agency_id: int = await operator.adb_client.add(agency, return_id=True) + + # Add US national location link + national_location_id = await operator.adb_client.scalar( + select( + Location.id + ).where( + Location.type == LocationType.NATIONAL + ) + ) + if national_location_id is None: + national_location_id = await operator.adb_client.add( + Location(type=LocationType.NATIONAL), + return_id=True + ) + + national_link = LinkAgencyLocation( + agency_id=agency_id, + location_id=national_location_id + ) + await operator.adb_client.add(national_link) + + # Check still does not meet prerequisites + assert not await operator.meets_task_prerequisites() + + # Add non-national location link for same federal agency + bad_link = LinkAgencyLocation( + agency_id=agency_id, + location_id=pittsburgh_locality.location_id + ) + await operator.adb_client.add(bad_link) + + # Check meets prerequisites + assert await operator.meets_task_prerequisites() + + # Run task and confirm produces error + await run_task_and_confirm_error( + operator=operator, + expected_view="integrity__federal_agencies_wrong_location_view" + ) + + # Remove non-national link + statement = ( + delete( + LinkAgencyLocation + ).where( + LinkAgencyLocation.agency_id == agency_id, + LinkAgencyLocation.location_id == pittsburgh_locality.location_id + ) + ) + await operator.adb_client.execute(statement) + + # Check no longer meets task prerequisites + assert not await operator.meets_task_prerequisites()