From a026cc1fbe9bb60c1fd3c6323629a38868bd95c8 Mon Sep 17 00:00:00 2001
From: Ricardo Boni <rboni@datakitchen.io>
Date: Fri, 1 May 2026 13:16:39 -0400
Subject: [PATCH 01/11] =?UTF-8?q?feat(mcp):=20hygiene=20issues=20=E2=80=94?=
 =?UTF-8?q?=20list,=20get,=20search,=20update=20+=20resource=20+=20prompt?=
 =?UTF-8?q?=20(TG-1029)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Expose profiling anomalies for read, search, and triage. Adds:

* `list_hygiene_issues` — paginated list per profiling run / table group
* `get_hygiene_issue` — full detail with type definition + column profile
* `search_hygiene_issues` — cross-run search with project scoping + `since`
* `update_hygiene_issue` — disposition update, gated by `disposition` permission
* `testgen://hygiene-issue-types` — reference table resource
* `hygiene_triage` — guided workflow prompt

Likelihood split into `issue_likelihood` + `pii_risk` so PII discovery doesn't
muddle regular-issue likelihood values; providing one auto-excludes the other
category via the natural query semantics.

User-facing terminology applied: `Quality Dimension`, `Muted` (for `Inactive`).
Run timestamp sourced from `JobExecution.started_at` per the run/JE consolidation
direction. No "anomaly" in user-facing surfaces — DB columns keep their names.
---
 testgen/common/models/hygiene_issue.py | 265 +++++++++++++++-
 testgen/mcp/prompts/workflows.py       |  35 +++
 testgen/mcp/server.py                  |  25 +-
 testgen/mcp/tools/common.py            |  59 ++++
 testgen/mcp/tools/hygiene_issues.py    | 405 +++++++++++++++++++++++++
 testgen/mcp/tools/reference.py         |  22 ++
 testgen/mcp/tools/test_definitions.py  |   6 +-
 7 files changed, 809 insertions(+), 8 deletions(-)
 create mode 100644 testgen/mcp/tools/hygiene_issues.py

diff --git a/testgen/common/models/hygiene_issue.py b/testgen/common/models/hygiene_issue.py
index caa4d460..a950c5a9 100644
--- a/testgen/common/models/hygiene_issue.py
+++ b/testgen/common/models/hygiene_issue.py
@@ -1,10 +1,11 @@
 import re
 from collections.abc import Iterable
 from dataclasses import dataclass
+from datetime import datetime
 from typing import Self
 from uuid import UUID, uuid4
 
-from sqlalchemy import Column, ForeignKey, String, and_, case, null, select
+from sqlalchemy import Boolean, Column, ForeignKey, String, and_, case, null, select, update
 from sqlalchemy.dialects import postgresql
 from sqlalchemy.ext.hybrid import hybrid_property
 from sqlalchemy.orm import aliased, relationship
@@ -12,6 +13,10 @@
 
 from testgen.common.models import Base, get_current_session
 from testgen.common.models.entity import Entity
+from testgen.common.models.job_execution import JobExecution
+from testgen.common.models.profile_result import ProfileResult
+from testgen.common.models.profiling_run import ProfilingRun
+from testgen.common.models.table_group import TableGroup
 
 PII_RISK_RE = re.compile(r"Risk: (MODERATE|HIGH),")
 
@@ -36,14 +41,96 @@ def active(self):
         return self.total - self.inactive
 
 
+@dataclass
+class HygieneIssueListRow:
+    """Row shape for ``list_hygiene_issues``."""
+
+    id: UUID
+    project_code: str
+    issue_type_name: str
+    schema_name: str
+    table_name: str
+    column_name: str
+    dq_dimension: str | None
+    disposition: str
+    priority: str | None
+    detail: str
+    detail_redactable: bool | None
+    pii_flag: str | None
+
+
+@dataclass
+class HygieneIssueSearchRow:
+    """Row shape for ``search_hygiene_issues``. Adds run + table-group context vs the list row."""
+
+    id: UUID
+    project_code: str
+    issue_type_name: str
+    table_groups_name: str
+    job_execution_id: UUID | None
+    started_at: datetime | None
+    schema_name: str
+    table_name: str
+    column_name: str
+    dq_dimension: str | None
+    disposition: str
+    priority: str | None
+    detail: str
+    detail_redactable: bool | None
+    pii_flag: str | None
+
+
+@dataclass
+class HygieneIssueDetail:
+    """Full row + type definition + column-profile context for ``get_hygiene_issue``."""
+
+    id: UUID
+    project_code: str
+    issue_type_name: str
+    type_description: str | None
+    suggested_action: str | None
+    schema_name: str
+    table_name: str
+    column_name: str
+    db_data_type: str | None
+    dq_dimension: str | None
+    impact_dimension: str | None
+    disposition: str
+    priority: str | None
+    detail: str
+    detail_redactable: bool | None
+    pii_flag: str | None
+    job_execution_id: UUID | None
+    started_at: datetime | None
+    column_general_type: str | None
+    column_db_data_type: str | None
+    column_record_ct: int | None
+    column_null_value_ct: int | None
+    column_distinct_value_ct: int | None
+
+
 class HygieneIssueType(Base):
     __tablename__ = "profile_anomaly_types"
 
     id: str = Column(String, primary_key=True)
     likelihood: str = Column("issue_likelihood", String)
     name: str = Column("anomaly_name", String)
+    description: str = Column("anomaly_description", String)
+    suggested_action: str = Column(String)
+    dq_dimension: str = Column(String)
+    impact_dimension: str = Column(String)
+    data_object: str = Column(String)
+    detail_redactable: bool = Column(Boolean)
+
+    # Unmapped: anomaly_type, anomaly_criteria, detail_expression,
+    # dq_score_prevalence_formula, dq_score_risk_factor.
 
-    # Note: not all table columns are implemented by this entity
+    @classmethod
+    def select_where(cls, *clauses, order_by=None) -> list[Self]:
+        query = select(cls).where(*clauses)
+        if order_by is not None:
+            query = query.order_by(*order_by)
+        return list(get_current_session().scalars(query))
 
 
 class HygieneIssue(Entity):
@@ -58,14 +145,18 @@ class HygieneIssue(Entity):
     type_id: str = Column("anomaly_id", String, ForeignKey("profile_anomaly_types.id"), nullable=False)
     type_ = relationship(HygieneIssueType)
 
+    column_id: UUID = Column(postgresql.UUID(as_uuid=True))
+
     schema_name: str = Column(String, nullable=False)
     table_name: str = Column(String, nullable=False)
     column_name: str = Column(String, nullable=False)
+    db_data_type: str = Column(String)
 
     detail: str = Column(String, nullable=False)
     disposition: str = Column(String)
+    impact_dimension: str = Column(String)
 
-    # Note: not all table columns are implemented by this entity
+    # Unmapped: column_type, dq_prevalence.
 
     @hybrid_property
     def priority(self):
@@ -132,6 +223,174 @@ def _count_active(likelihood_values: tuple[str, ...]):
         row = get_current_session().execute(query).first()
         return IssueLikelihoodCounts(**{k: v for k, v in row._mapping.items() if v is not None})
 
+    @classmethod
+    def _priority_order(cls):
+        return case(
+            (cls.priority == "Definite", 1),
+            (cls.priority == "Likely", 2),
+            (cls.priority == "Possible", 3),
+            (cls.priority == "High", 4),
+            (cls.priority == "Moderate", 5),
+            else_=6,
+        )
+
+    @classmethod
+    def list_for_run(
+        cls,
+        profile_run_id: UUID,
+        *clauses,
+        page: int = 1,
+        limit: int = 50,
+    ) -> tuple[list[HygieneIssueListRow], int]:
+        """Paginated hygiene issues for a single profiling run.
+
+        Caller-supplied ``*clauses`` carry every WHERE filter (project scoping, disposition,
+        likelihood / pii_risk, table / column / dq_dimension / issue_type filters).
+        """
+        query = (
+            select(
+                cls.id.label("id"),
+                cls.project_code.label("project_code"),
+                HygieneIssueType.name.label("issue_type_name"),
+                cls.schema_name.label("schema_name"),
+                cls.table_name.label("table_name"),
+                cls.column_name.label("column_name"),
+                HygieneIssueType.dq_dimension.label("dq_dimension"),
+                func.coalesce(cls.disposition, "Confirmed").label("disposition"),
+                cls.priority.label("priority"),
+                cls.detail.label("detail"),
+                HygieneIssueType.detail_redactable.label("detail_redactable"),
+                ProfileResult.pii_flag.label("pii_flag"),
+            )
+            .join(HygieneIssueType, HygieneIssueType.id == cls.type_id)
+            .outerjoin(
+                ProfileResult,
+                and_(
+                    ProfileResult.profile_run_id == cls.profile_run_id,
+                    ProfileResult.schema_name == cls.schema_name,
+                    ProfileResult.table_name == cls.table_name,
+                    ProfileResult.column_name == cls.column_name,
+                ),
+            )
+            .where(cls.profile_run_id == profile_run_id, *clauses)
+            .order_by(cls._priority_order(), cls.table_name, cls.column_name, cls.id)
+        )
+        return cls._paginate(query, page=page, limit=limit, data_class=HygieneIssueListRow)
+
+    @classmethod
+    def search(
+        cls,
+        *clauses,
+        page: int = 1,
+        limit: int = 50,
+    ) -> tuple[list[HygieneIssueSearchRow], int]:
+        """Cross-run paginated search over hygiene issues.
+
+        Always JOINs ``ProfilingRun`` + ``JobExecution`` (for ``started_at`` + ``job_execution_id``)
+        and ``TableGroup`` (for ``table_groups_name``). Caller-supplied ``*clauses`` carry every
+        WHERE filter (project scoping, ``JobExecution.started_at`` window, all user filters).
+        """
+        query = (
+            select(
+                cls.id.label("id"),
+                cls.project_code.label("project_code"),
+                HygieneIssueType.name.label("issue_type_name"),
+                TableGroup.table_groups_name.label("table_groups_name"),
+                ProfilingRun.job_execution_id.label("job_execution_id"),
+                JobExecution.started_at.label("started_at"),
+                cls.schema_name.label("schema_name"),
+                cls.table_name.label("table_name"),
+                cls.column_name.label("column_name"),
+                HygieneIssueType.dq_dimension.label("dq_dimension"),
+                func.coalesce(cls.disposition, "Confirmed").label("disposition"),
+                cls.priority.label("priority"),
+                cls.detail.label("detail"),
+                HygieneIssueType.detail_redactable.label("detail_redactable"),
+                ProfileResult.pii_flag.label("pii_flag"),
+            )
+            .join(HygieneIssueType, HygieneIssueType.id == cls.type_id)
+            .join(ProfilingRun, ProfilingRun.id == cls.profile_run_id)
+            .outerjoin(JobExecution, JobExecution.id == ProfilingRun.job_execution_id)
+            .join(TableGroup, TableGroup.id == cls.table_groups_id)
+            .outerjoin(
+                ProfileResult,
+                and_(
+                    ProfileResult.profile_run_id == cls.profile_run_id,
+                    ProfileResult.schema_name == cls.schema_name,
+                    ProfileResult.table_name == cls.table_name,
+                    ProfileResult.column_name == cls.column_name,
+                ),
+            )
+            .where(*clauses)
+            .order_by(JobExecution.started_at.desc(), cls._priority_order(), cls.id)
+        )
+        return cls._paginate(query, page=page, limit=limit, data_class=HygieneIssueSearchRow)
+
+    @classmethod
+    def get_with_context(cls, issue_id: UUID, *clauses) -> HygieneIssueDetail | None:
+        """Fetch one hygiene issue with type definition + column-profile context.
+
+        Returns ``None`` when no row matches the id and ``*clauses`` together — the
+        caller decides whether that's "missing", "not accessible", or both collapsed
+        into one error.
+
+        Joins ``ProfileResult`` outer-style: table-level issues may have no matching
+        column profile row, in which case the column_* fields stay ``None``.
+        """
+        query = (
+            select(
+                cls.id.label("id"),
+                cls.project_code.label("project_code"),
+                HygieneIssueType.name.label("issue_type_name"),
+                HygieneIssueType.description.label("type_description"),
+                HygieneIssueType.suggested_action.label("suggested_action"),
+                cls.schema_name.label("schema_name"),
+                cls.table_name.label("table_name"),
+                cls.column_name.label("column_name"),
+                cls.db_data_type.label("db_data_type"),
+                HygieneIssueType.dq_dimension.label("dq_dimension"),
+                cls.impact_dimension.label("impact_dimension"),
+                func.coalesce(cls.disposition, "Confirmed").label("disposition"),
+                cls.priority.label("priority"),
+                cls.detail.label("detail"),
+                HygieneIssueType.detail_redactable.label("detail_redactable"),
+                ProfileResult.pii_flag.label("pii_flag"),
+                ProfilingRun.job_execution_id.label("job_execution_id"),
+                JobExecution.started_at.label("started_at"),
+                ProfileResult.general_type.label("column_general_type"),
+                ProfileResult.db_data_type.label("column_db_data_type"),
+                ProfileResult.record_ct.label("column_record_ct"),
+                ProfileResult.null_value_ct.label("column_null_value_ct"),
+                ProfileResult.distinct_value_ct.label("column_distinct_value_ct"),
+            )
+            .join(HygieneIssueType, HygieneIssueType.id == cls.type_id)
+            .join(ProfilingRun, ProfilingRun.id == cls.profile_run_id)
+            .outerjoin(JobExecution, JobExecution.id == ProfilingRun.job_execution_id)
+            .outerjoin(
+                ProfileResult,
+                and_(
+                    ProfileResult.profile_run_id == cls.profile_run_id,
+                    ProfileResult.schema_name == cls.schema_name,
+                    ProfileResult.table_name == cls.table_name,
+                    ProfileResult.column_name == cls.column_name,
+                ),
+            )
+            .where(cls.id == issue_id, *clauses)
+        )
+        row = get_current_session().execute(query).mappings().first()
+        return HygieneIssueDetail(**row) if row else None
+
+    @classmethod
+    def update_disposition(cls, issue_id: UUID, disposition: str, *clauses) -> bool:
+        """Update disposition on a single hygiene issue, scoped by caller-supplied clauses.
+
+        Returns ``True`` if a row was updated, ``False`` if no row matched the id and
+        ``*clauses`` together.
+        """
+        stmt = update(cls).where(cls.id == issue_id, *clauses).values(disposition=disposition)
+        result = get_current_session().execute(stmt)
+        return result.rowcount > 0
+
     @classmethod
     def select_with_diff(
         cls, profiling_run_id: UUID, other_profiling_run_id: UUID | None, *where_clauses, limit: int | None = None
diff --git a/testgen/mcp/prompts/workflows.py b/testgen/mcp/prompts/workflows.py
index ff76b7b8..2317054b 100644
--- a/testgen/mcp/prompts/workflows.py
+++ b/testgen/mcp/prompts/workflows.py
@@ -77,6 +77,41 @@ def profiling_overview() -> str:
 """
 
 
+def hygiene_triage(table_group_id: str | None = None) -> str:
+    """Guided hygiene issue triage workflow — review hygiene issues and decide what to do.
+
+    Args:
+        table_group_id: Optional UUID of a table group to focus on.
+    """
+    intro = (
+        f"Focus on table group `{table_group_id}`."
+        if table_group_id
+        else "Pick a table group with confirmed hygiene issues."
+    )
+    tg = f"'{table_group_id}'" if table_group_id else "'...'"
+
+    steps: list[str] = []
+    if not table_group_id:
+        steps.append(
+            "Call `get_data_inventory()` to see projects and table groups, with profiling status per group."
+        )
+    steps.append(f"Call `list_profiling_summaries(table_group_id={tg})` to see hygiene issue counts per run.")
+    steps.append(f"Call `list_hygiene_issues(table_group_id={tg}, disposition='Confirmed')` for the issues to review.")
+    steps.append(
+        "For each top issue (ordered by priority), call `get_hygiene_issue(issue_id='...')` for full context — "
+        "issue type description, suggested action, column profile."
+    )
+    steps.append("For unfamiliar issue types, read `testgen://hygiene-issue-types` once for the reference table.")
+    steps.append(
+        "For each issue: explain what was found, then ask the user whether to dismiss the issue "
+        "(call `update_hygiene_issue(issue_id='...', disposition='Dismissed')`) or investigate further."
+    )
+    steps.append("Summarize the triage results and any patterns noted across the issues.")
+
+    numbered = "\n".join(f"{i + 1}. {step}" for i, step in enumerate(steps))
+    return f"Please triage hygiene issues. {intro}\n\n{numbered}\n"
+
+
 def compare_runs(test_suite: str | None = None) -> str:
     """Compare the two most recent test runs to identify regressions and improvements.
 
diff --git a/testgen/mcp/server.py b/testgen/mcp/server.py
index 19e3a539..ffe5d1c8 100644
--- a/testgen/mcp/server.py
+++ b/testgen/mcp/server.py
@@ -30,6 +30,9 @@
 Test types have specific, non-obvious meanings (e.g., Alpha_Trunc). Do not guess what a test checks.
 ALWAYS look them up using either the `testgen://test-types` resource or the `get_test_type()` tool.
 
+Hygiene issue types similarly have specific meanings. ALWAYS look them up using the
+`testgen://hygiene-issue-types` resource.
+
 INVESTIGATING FAILURES
 
 Use list_test_results to find failures, then get_source_data to see relevant data from the connected database.
@@ -90,6 +93,7 @@ def build_mcp_app(
     from testgen.mcp.prompts.workflows import (
         compare_runs,
         health_check,
+        hygiene_triage,
         investigate_failures,
         profiling_overview,
         table_health,
@@ -102,8 +106,19 @@ def build_mcp_app(
         run_profiling,
         run_tests,
     )
+    from testgen.mcp.tools.hygiene_issues import (
+        get_hygiene_issue,
+        list_hygiene_issues,
+        search_hygiene_issues,
+        update_hygiene_issue,
+    )
     from testgen.mcp.tools.profiling import get_table, list_column_profiles, list_profiling_summaries
-    from testgen.mcp.tools.reference import get_test_type, glossary_resource, test_types_resource
+    from testgen.mcp.tools.reference import (
+        get_test_type,
+        glossary_resource,
+        hygiene_issue_types_resource,
+        test_types_resource,
+    )
     from testgen.mcp.tools.source_data import get_source_data, get_source_data_query
     from testgen.mcp.tools.test_definitions import get_test, list_test_notes, list_test_types, list_tests
     from testgen.mcp.tools.test_results import (
@@ -166,9 +181,14 @@ def safe_prompt(fn):
     safe_tool(cancel_test_run)
     safe_tool(cancel_profiling_run)
     safe_tool(generate_tests)
+    safe_tool(list_hygiene_issues)
+    safe_tool(get_hygiene_issue)
+    safe_tool(search_hygiene_issues)
+    safe_tool(update_hygiene_issue)
 
-    # Resources (2)
+    # Resources
     safe_resource("testgen://test-types", test_types_resource)
+    safe_resource("testgen://hygiene-issue-types", hygiene_issue_types_resource)
     safe_resource("testgen://glossary", glossary_resource)
 
     # Prompts
@@ -177,6 +197,7 @@ def safe_prompt(fn):
     safe_prompt(table_health)
     safe_prompt(compare_runs)
     safe_prompt(profiling_overview)
+    safe_prompt(hygiene_triage)
 
     app = mcp.streamable_http_app()
     return app, mcp.session_manager
diff --git a/testgen/mcp/tools/common.py b/testgen/mcp/tools/common.py
index 35a4f64f..d6c2d3a3 100644
--- a/testgen/mcp/tools/common.py
+++ b/testgen/mcp/tools/common.py
@@ -2,6 +2,7 @@
 from uuid import UUID
 
 from testgen.common.date_service import parse_since
+from testgen.common.models.hygiene_issue import HygieneIssueType
 from testgen.common.models.table_group import TableGroup
 from testgen.common.models.test_definition import TestType
 from testgen.common.models.test_result import TestResultStatus
@@ -9,6 +10,13 @@
 from testgen.mcp.exceptions import MCPResourceNotAccessible, MCPUserError
 from testgen.mcp.permissions import get_project_permissions
 
+VALID_DQ_DIMENSIONS = {"Accuracy", "Completeness", "Consistency", "Recency", "Timeliness", "Uniqueness", "Validity"}
+# DB stores "Inactive"; user-facing label is "Muted".
+_DISPOSITION_USER_TO_DB = {"Confirmed": "Confirmed", "Dismissed": "Dismissed", "Muted": "Inactive"}
+_DISPOSITION_DB_TO_USER = {v: k for k, v in _DISPOSITION_USER_TO_DB.items()}
+_VALID_ISSUE_LIKELIHOODS = {"Definite", "Likely", "Possible"}
+_VALID_PII_RISKS = {"High", "Moderate"}
+
 
 def parse_uuid(value: str, label: str = "ID") -> UUID:
     try:
@@ -42,6 +50,46 @@ def parse_since_arg(value: str, label: str = "since", *, today: date | None = No
         raise MCPUserError(f"Invalid `{label}`: {err}") from err
 
 
+def parse_quality_dimension(value: str) -> str:
+    if value not in VALID_DQ_DIMENSIONS:
+        valid = ", ".join(sorted(VALID_DQ_DIMENSIONS))
+        raise MCPUserError(f"Invalid quality_dimension `{value}`. Valid values: {valid}")
+    return value
+
+
+def parse_disposition(value: str) -> str:
+    """Validate a user-facing disposition label and return the DB value.
+
+    Accepts ``Confirmed``, ``Dismissed``, ``Muted`` and returns ``Confirmed``,
+    ``Dismissed``, ``Inactive`` respectively (the DB encodes the legacy ``Inactive``).
+    """
+    if value not in _DISPOSITION_USER_TO_DB:
+        valid = ", ".join(sorted(_DISPOSITION_USER_TO_DB))
+        raise MCPUserError(f"Invalid disposition `{value}`. Valid values: {valid}")
+    return _DISPOSITION_USER_TO_DB[value]
+
+
+def format_disposition(value: str) -> str:
+    """Map a DB disposition value to its user-facing label (``Inactive`` → ``Muted``)."""
+    return _DISPOSITION_DB_TO_USER.get(value, value)
+
+
+def parse_issue_likelihood_list(values: list[str]) -> list[str]:
+    invalid = [v for v in values if v not in _VALID_ISSUE_LIKELIHOODS]
+    if invalid:
+        valid = ", ".join(sorted(_VALID_ISSUE_LIKELIHOODS))
+        raise MCPUserError(f"Invalid issue_likelihood values {invalid}. Valid values: {valid}")
+    return values
+
+
+def parse_pii_risk_list(values: list[str]) -> list[str]:
+    invalid = [v for v in values if v not in _VALID_PII_RISKS]
+    if invalid:
+        valid = ", ".join(sorted(_VALID_PII_RISKS))
+        raise MCPUserError(f"Invalid pii_risk values {invalid}. Valid values: {valid}")
+    return values
+
+
 def resolve_test_type(short_name: str) -> str:
     """Resolve a test type short name to its internal code."""
     matches = TestType.select_where(TestType.test_name_short == short_name)
@@ -52,6 +100,17 @@ def resolve_test_type(short_name: str) -> str:
     return matches[0].test_type
 
 
+def resolve_issue_type(name: str) -> str:
+    """Resolve a hygiene issue type human label to its internal id (case-sensitive exact match)."""
+    matches = HygieneIssueType.select_where(HygieneIssueType.name == name)
+    if not matches:
+        raise MCPUserError(
+            f"Unknown hygiene issue type: `{name}`. "
+            "Use the testgen://hygiene-issue-types resource to see available types."
+        )
+    return matches[0].id
+
+
 def format_page_info(total: int, page: int, limit: int) -> str:
     """Shared pagination summary line for MCP tool output."""
     if total == 0:
diff --git a/testgen/mcp/tools/hygiene_issues.py b/testgen/mcp/tools/hygiene_issues.py
new file mode 100644
index 00000000..7101587a
--- /dev/null
+++ b/testgen/mcp/tools/hygiene_issues.py
@@ -0,0 +1,405 @@
+from uuid import UUID
+
+from sqlalchemy import and_, or_
+from sqlalchemy.sql.elements import ColumnElement
+from sqlalchemy.sql.functions import func
+
+from testgen.common.models import with_database_session
+from testgen.common.models.hygiene_issue import HygieneIssue, HygieneIssueType
+from testgen.common.models.job_execution import JobExecution
+from testgen.common.models.profiling_run import ProfilingRun
+from testgen.common.models.table_group import TableGroup
+from testgen.common.pii_masking import PII_REDACTED
+from testgen.mcp.exceptions import MCPResourceNotAccessible, MCPUserError
+from testgen.mcp.permissions import get_project_permissions, mcp_permission
+from testgen.mcp.tools.common import (
+    format_disposition,
+    format_page_footer,
+    format_page_info,
+    parse_disposition,
+    parse_issue_likelihood_list,
+    parse_pii_risk_list,
+    parse_quality_dimension,
+    parse_since_arg,
+    parse_uuid,
+    resolve_issue_type,
+    resolve_table_group,
+    validate_limit,
+    validate_page,
+)
+from testgen.mcp.tools.markdown import MdDoc
+
+
+def _redact_detail(row, view_pii_codes: set[str]) -> str:
+    """Return the row's detail, redacted if the type is redactable, the column is PII,
+    and the caller lacks `view_pii` on the row's project. Mirrors `mask_hygiene_detail`.
+    """
+    if row.detail_redactable and row.pii_flag and row.project_code not in view_pii_codes:
+        return PII_REDACTED
+    return row.detail
+
+
+def _build_likelihood_clause(
+    issue_likelihood: list[str] | None,
+    pii_risk: list[str] | None,
+) -> ColumnElement[bool] | None:
+    """Construct the WHERE clause for the likelihood / pii_risk filter pair.
+
+    `issue_likelihood` matches `HygieneIssueType.likelihood` directly, so PII rows
+    (likelihood = "Potential PII") are excluded automatically. `pii_risk` matches
+    only PII rows by also requiring `likelihood == "Potential PII"`. Providing one
+    of the two filters therefore auto-excludes the other category.
+    """
+    likelihood_clause = HygieneIssueType.likelihood.in_(issue_likelihood) if issue_likelihood else None
+    pii_clause = (
+        and_(HygieneIssueType.likelihood == "Potential PII", HygieneIssue.priority.in_(pii_risk))
+        if pii_risk
+        else None
+    )
+    if likelihood_clause is None:
+        return pii_clause
+    if pii_clause is None:
+        return likelihood_clause
+    return or_(likelihood_clause, pii_clause)
+
+
+def _resolve_profile_run(
+    *,
+    job_execution_id: str | None,
+    table_group_id: str | None,
+) -> tuple[UUID, str]:
+    """Resolve the scope to a (profile_run_id, run_id_label) tuple.
+
+    Mutual-exclusion + at-least-one validation done by caller. Collapses missing
+    runs and inaccessible table groups into a single ``MCPResourceNotAccessible``.
+    """
+    if table_group_id:
+        tg = resolve_table_group(table_group_id)
+        if tg.last_complete_profile_run_id is None:
+            raise MCPUserError(f"No completed profiling runs found for table group `{table_group_id}`.")
+        run = ProfilingRun.get(tg.last_complete_profile_run_id)
+        run_id_label = str(run.job_execution_id) if run and run.job_execution_id else f"latest run for table group `{table_group_id}`"
+        return tg.last_complete_profile_run_id, run_id_label
+
+    job_uuid = parse_uuid(job_execution_id, "job_execution_id")
+    run = ProfilingRun.get_by_id_or_job(job_uuid)
+    perms = get_project_permissions()
+    tg = TableGroup.get(run.table_groups_id) if run else None
+    if run is None or tg is None or not perms.has_access(tg.project_code):
+        raise MCPResourceNotAccessible("Profiling run", job_execution_id)
+    return run.id, job_execution_id
+
+
+@with_database_session
+@mcp_permission("view")
+def list_hygiene_issues(
+    *,
+    job_execution_id: str | None = None,
+    table_group_id: str | None = None,
+    table_name: str | None = None,
+    column_name: str | None = None,
+    quality_dimension: str | None = None,
+    disposition: str | None = "Confirmed",
+    issue_likelihood: list[str] | None = None,
+    pii_risk: list[str] | None = None,
+    issue_type: str | None = None,
+    limit: int = 50,
+    page: int = 1,
+) -> str:
+    """List hygiene issues for a profiling run.
+
+    Provide either ``job_execution_id`` for a specific run, or ``table_group_id`` to list
+    the issues from its latest profiling run.
+
+    Args:
+        job_execution_id: UUID of a profiling run, e.g. from ``list_profiling_summaries``.
+        table_group_id: UUID of a table group. Resolves to the latest completed profiling run.
+            Mutually exclusive with ``job_execution_id``.
+        table_name: Filter by table name (exact match).
+        column_name: Filter by column name (exact match).
+        quality_dimension: Filter by data quality dimension ('Accuracy', 'Completeness',
+            'Consistency', 'Recency', 'Timeliness', 'Uniqueness', 'Validity').
+        disposition: Filter by disposition. Defaults to 'Confirmed'.
+            Valid values: 'Confirmed', 'Dismissed', 'Muted'.
+        issue_likelihood: Filter by issue likelihood. Values: 'Definite', 'Likely', 'Possible'.
+            Providing this filter auto-excludes PII issues; combine with ``pii_risk`` to include both.
+        pii_risk: Filter by PII risk level. Values: 'High', 'Moderate'.
+            Providing this filter auto-excludes regular issues.
+        issue_type: Filter by hygiene issue type (e.g. 'Personally Identifiable Information').
+            See ``testgen://hygiene-issue-types`` for the full list.
+        limit: Maximum number of issues per page (default 50, max 200).
+        page: Page number, starting from 1 (default 1).
+    """
+    if job_execution_id and table_group_id:
+        raise MCPUserError("Pass either `job_execution_id` or `table_group_id`, not both.")
+    if not job_execution_id and not table_group_id:
+        raise MCPUserError("Provide either `job_execution_id` or `table_group_id`.")
+    validate_page(page)
+    validate_limit(limit, 200)
+
+    perms = get_project_permissions()
+    profile_run_id, run_id_label = _resolve_profile_run(
+        job_execution_id=job_execution_id,
+        table_group_id=table_group_id,
+    )
+
+    issue_likelihood = parse_issue_likelihood_list(issue_likelihood) if issue_likelihood else None
+    pii_risk = parse_pii_risk_list(pii_risk) if pii_risk else None
+
+    clauses = [HygieneIssue.project_code.in_(perms.allowed_codes)]
+    disposition_value = parse_disposition(disposition or "Confirmed")
+    clauses.append(func.coalesce(HygieneIssue.disposition, "Confirmed") == disposition_value)
+    if table_name:
+        clauses.append(HygieneIssue.table_name == table_name)
+    if column_name:
+        clauses.append(HygieneIssue.column_name == column_name)
+    if quality_dimension:
+        clauses.append(HygieneIssueType.dq_dimension == parse_quality_dimension(quality_dimension))
+    if issue_type:
+        clauses.append(HygieneIssue.type_id == resolve_issue_type(issue_type))
+    likelihood_clause = _build_likelihood_clause(issue_likelihood, pii_risk)
+    if likelihood_clause is not None:
+        clauses.append(likelihood_clause)
+
+    rows, total = HygieneIssue.list_for_run(profile_run_id, *clauses, page=page, limit=limit)
+
+    doc = MdDoc()
+    doc.heading(1, f"Hygiene Issues for run `{run_id_label}`")
+    if not rows:
+        doc.text("_No hygiene issues match the supplied filters._")
+        return doc.render()
+    if info := format_page_info(total, page, limit):
+        doc.text(info)
+
+    view_pii_codes = set(perms.codes_allowed_to("view_pii"))
+    for r in rows:
+        location = f"`{r.column_name}` in `{r.table_name}`" if r.column_name else f"`{r.table_name}`"
+        doc.heading(2, f"[{r.priority or 'Unknown'}] {r.issue_type_name} on {location}")
+        doc.field("Issue ID", r.id, code=True)
+        doc.field("Issue Type", r.issue_type_name)
+        if r.dq_dimension:
+            doc.field("Quality Dimension", r.dq_dimension)
+        doc.field("Disposition", format_disposition(r.disposition))
+        if r.priority:
+            doc.field("Priority", r.priority)
+        if r.detail:
+            doc.field("Detail", _redact_detail(r, view_pii_codes))
+
+    if footer := format_page_footer(total, page, limit):
+        doc.text(footer)
+
+    return doc.render()
+
+
+@with_database_session
+@mcp_permission("disposition")
+def update_hygiene_issue(*, issue_id: str, disposition: str) -> str:
+    """Update the disposition of a hygiene issue (confirm, dismiss, or mute).
+
+    Args:
+        issue_id: UUID of the hygiene issue.
+        disposition: New disposition. Valid values: 'Confirmed', 'Dismissed', 'Muted'.
+    """
+    issue_uuid = parse_uuid(issue_id, "issue_id")
+    db_disposition = parse_disposition(disposition)
+    perms = get_project_permissions()
+
+    updated = HygieneIssue.update_disposition(
+        issue_uuid,
+        db_disposition,
+        HygieneIssue.project_code.in_(perms.allowed_codes),
+    )
+    if not updated:
+        raise MCPResourceNotAccessible("Hygiene issue", issue_id)
+
+    doc = MdDoc()
+    doc.text(f"Updated hygiene issue {MdDoc.code(issue_id)} disposition to **{disposition}**.")
+    return doc.render()
+
+
+@with_database_session
+@mcp_permission("view")
+def get_hygiene_issue(*, issue_id: str) -> str:
+    """Get full details of a specific hygiene issue.
+
+    Includes the issue type definition (description, suggested action), profiling run
+    metadata, and column-profile context (general type, null rate, distinct count).
+
+    Args:
+        issue_id: UUID of the hygiene issue, e.g. from ``list_hygiene_issues`` or
+            ``search_hygiene_issues``.
+    """
+    issue_uuid = parse_uuid(issue_id, "issue_id")
+    perms = get_project_permissions()
+
+    detail = HygieneIssue.get_with_context(
+        issue_uuid,
+        HygieneIssue.project_code.in_(perms.allowed_codes),
+    )
+    if detail is None:
+        raise MCPResourceNotAccessible("Hygiene issue", issue_id)
+
+    location = (
+        f"`{detail.column_name}` in `{detail.table_name}`" if detail.column_name else f"`{detail.table_name}`"
+    )
+
+    doc = MdDoc()
+    doc.heading(1, f"[{detail.priority or 'Unknown'}] {detail.issue_type_name} on {location}")
+    doc.field("Issue ID", detail.id, code=True)
+    doc.field("Issue Type", detail.issue_type_name)
+    doc.field("Schema", detail.schema_name)
+    doc.field("Table", detail.table_name)
+    if detail.column_name:
+        doc.field("Column", detail.column_name)
+    if detail.dq_dimension:
+        doc.field("Quality Dimension", detail.dq_dimension)
+    if detail.impact_dimension:
+        doc.field("Impact Dimension", detail.impact_dimension)
+    doc.field("Disposition", format_disposition(detail.disposition))
+    if detail.priority:
+        doc.field("Priority", detail.priority)
+    if detail.detail:
+        view_pii_codes = set(perms.codes_allowed_to("view_pii"))
+        doc.field("Detail", _redact_detail(detail, view_pii_codes))
+
+    if detail.suggested_action:
+        doc.heading(2, "Suggested Action")
+        doc.text(detail.suggested_action)
+
+    if detail.type_description:
+        doc.heading(2, "Issue Type Description")
+        doc.text(detail.type_description)
+
+    has_column_profile = detail.column_general_type is not None or detail.column_record_ct is not None
+    if has_column_profile:
+        doc.heading(2, "Column Profile")
+        if detail.column_general_type:
+            doc.field("General Type", detail.column_general_type)
+        if detail.column_db_data_type:
+            doc.field("DB Data Type", detail.column_db_data_type)
+        if detail.column_record_ct is not None:
+            doc.field("Records", detail.column_record_ct)
+        if detail.column_null_value_ct is not None:
+            doc.field("Nulls", detail.column_null_value_ct)
+            if detail.column_record_ct:
+                doc.field("Null Rate", f"{detail.column_null_value_ct / detail.column_record_ct:.2%}")
+        if detail.column_distinct_value_ct is not None:
+            doc.field("Distinct values", detail.column_distinct_value_ct)
+
+    doc.heading(2, "Profiling Run")
+    if detail.job_execution_id:
+        doc.field("Run", detail.job_execution_id, code=True)
+    doc.field("Run Date", detail.started_at)
+
+    return doc.render()
+
+
+@with_database_session
+@mcp_permission("view")
+def search_hygiene_issues(
+    *,
+    project_code: str | None = None,
+    table_group_id: str | None = None,
+    issue_type: str | None = None,
+    quality_dimension: str | None = None,
+    disposition: str | None = "Confirmed",
+    issue_likelihood: list[str] | None = None,
+    pii_risk: list[str] | None = None,
+    since: str | None = None,
+    table_name: str | None = None,
+    column_name: str | None = None,
+    limit: int = 50,
+    page: int = 1,
+) -> str:
+    """Search hygiene issues across profiling runs and table groups.
+
+    To drill into a single run, use ``list_hygiene_issues``.
+
+    Args:
+        project_code: Scope to a specific project.
+        table_group_id: UUID of a table group to scope to.
+        issue_type: Filter by hygiene issue type (e.g. 'Personally Identifiable Information').
+            See ``testgen://hygiene-issue-types`` for the full list.
+        quality_dimension: Filter by data quality dimension ('Accuracy', 'Completeness',
+            'Consistency', 'Recency', 'Timeliness', 'Uniqueness', 'Validity').
+        disposition: Filter by disposition. Defaults to 'Confirmed'.
+            Valid values: 'Confirmed', 'Dismissed', 'Muted'.
+        issue_likelihood: Filter by issue likelihood. Values: 'Definite', 'Likely', 'Possible'.
+            Providing this filter auto-excludes PII issues; combine with ``pii_risk`` to include both.
+        pii_risk: Filter by PII risk level. Values: 'High', 'Moderate'.
+            Providing this filter auto-excludes regular issues.
+        since: Include issues from runs that started since this point — e.g. '7 days',
+            '2 weeks', '2026-04-01'.
+        table_name: Filter by table name (exact match).
+        column_name: Filter by column name (exact match).
+        limit: Maximum number of issues per page (default 50, max 200).
+        page: Page number, starting from 1 (default 1).
+    """
+    validate_page(page)
+    validate_limit(limit, 200)
+
+    perms = get_project_permissions()
+    if project_code:
+        perms.verify_access(project_code, not_found=MCPResourceNotAccessible("Project", project_code))
+        project_codes = [project_code]
+    else:
+        project_codes = perms.allowed_codes
+
+    table_group_uuid = parse_uuid(table_group_id, "table_group_id") if table_group_id else None
+    since_date = parse_since_arg(since) if since else None
+    type_id = resolve_issue_type(issue_type) if issue_type else None
+    issue_likelihood = parse_issue_likelihood_list(issue_likelihood) if issue_likelihood else None
+    pii_risk = parse_pii_risk_list(pii_risk) if pii_risk else None
+
+    clauses = [
+        HygieneIssue.project_code.in_(project_codes),
+        func.coalesce(HygieneIssue.disposition, "Confirmed") == parse_disposition(disposition or "Confirmed"),
+    ]
+    if table_group_uuid is not None:
+        clauses.append(HygieneIssue.table_groups_id == table_group_uuid)
+    if quality_dimension:
+        clauses.append(HygieneIssueType.dq_dimension == parse_quality_dimension(quality_dimension))
+    if type_id:
+        clauses.append(HygieneIssue.type_id == type_id)
+    if table_name:
+        clauses.append(HygieneIssue.table_name == table_name)
+    if column_name:
+        clauses.append(HygieneIssue.column_name == column_name)
+    if since_date is not None:
+        clauses.append(JobExecution.started_at >= since_date)
+    likelihood_clause = _build_likelihood_clause(issue_likelihood, pii_risk)
+    if likelihood_clause is not None:
+        clauses.append(likelihood_clause)
+
+    rows, total = HygieneIssue.search(*clauses, page=page, limit=limit)
+
+    doc = MdDoc()
+    doc.heading(1, "Hygiene Issue Search")
+    if not rows:
+        doc.text("_No hygiene issues match the supplied filters._")
+        return doc.render()
+    if info := format_page_info(total, page, limit):
+        doc.text(info)
+
+    view_pii_codes = set(perms.codes_allowed_to("view_pii"))
+    for r in rows:
+        location = f"`{r.column_name}` in `{r.table_name}`" if r.column_name else f"`{r.table_name}`"
+        doc.heading(2, f"[{r.priority or 'Unknown'}] {r.issue_type_name} on {location}")
+        doc.field("Issue ID", r.id, code=True)
+        doc.field("Issue Type", r.issue_type_name)
+        doc.field("Table Group", r.table_groups_name)
+        if r.job_execution_id:
+            doc.field("Profiling Run", r.job_execution_id, code=True)
+        doc.field("Run Date", r.started_at)
+        if r.dq_dimension:
+            doc.field("Quality Dimension", r.dq_dimension)
+        if r.priority:
+            doc.field("Priority", r.priority)
+        doc.field("Disposition", format_disposition(r.disposition))
+        if r.detail:
+            doc.field("Detail", _redact_detail(r, view_pii_codes))
+
+    if footer := format_page_footer(total, page, limit):
+        doc.text(footer)
+
+    return doc.render()
diff --git a/testgen/mcp/tools/reference.py b/testgen/mcp/tools/reference.py
index 114cfe45..23217cdd 100644
--- a/testgen/mcp/tools/reference.py
+++ b/testgen/mcp/tools/reference.py
@@ -1,4 +1,5 @@
 from testgen.common.models import with_database_session
+from testgen.common.models.hygiene_issue import HygieneIssueType
 from testgen.common.models.test_definition import TestType
 from testgen.mcp.tools.markdown import MdDoc
 
@@ -80,6 +81,27 @@ def test_types_resource() -> str:
     return doc.render()
 
 
+@with_database_session
+def hygiene_issue_types_resource() -> str:
+    """Reference table of all hygiene issue types with their data quality dimensions, descriptions, and suggested actions."""
+    issue_types = HygieneIssueType.select_where(order_by=(HygieneIssueType.name,))
+
+    if not issue_types:
+        return "No hygiene issue types found."
+
+    doc = MdDoc()
+    doc.heading(1, "TestGen Hygiene Issue Types Reference")
+    doc.table(
+        headers=["Issue Type", "Quality Dimension", "Impact Dimension", "Description", "Suggested Action"],
+        rows=[
+            [it.name, it.dq_dimension, it.impact_dimension, it.description, it.suggested_action]
+            for it in issue_types
+        ],
+    )
+
+    return doc.render()
+
+
 def glossary_resource() -> str:
     """Glossary of TestGen concepts, entity hierarchy, result statuses, and quality dimensions."""
     return """\
diff --git a/testgen/mcp/tools/test_definitions.py b/testgen/mcp/tools/test_definitions.py
index 7863c331..3c520fe1 100644
--- a/testgen/mcp/tools/test_definitions.py
+++ b/testgen/mcp/tools/test_definitions.py
@@ -4,6 +4,7 @@
 from testgen.mcp.exceptions import MCPUserError
 from testgen.mcp.permissions import get_project_permissions, mcp_permission
 from testgen.mcp.tools.common import (
+    VALID_DQ_DIMENSIONS,
     format_page_footer,
     format_page_info,
     parse_uuid,
@@ -15,7 +16,6 @@
 
 _VALID_SCOPES = {"column", "table", "referential", "custom"}
 _VALID_IMPACT_DIMENSIONS = {"Reliability", "Conformance", "Regularity", "Usability"}
-_VALID_DQ_DIMENSIONS = {"Accuracy", "Completeness", "Consistency", "Recency", "Timeliness", "Uniqueness", "Validity"}
 
 
 @with_database_session
@@ -303,8 +303,8 @@ def list_test_types(
     if impact_dimension and impact_dimension not in _VALID_IMPACT_DIMENSIONS:
         valid = ", ".join(sorted(_VALID_IMPACT_DIMENSIONS))
         raise MCPUserError(f"Invalid impact_dimension `{impact_dimension}`. Valid values: {valid}")
-    if quality_dimension and quality_dimension not in _VALID_DQ_DIMENSIONS:
-        valid = ", ".join(sorted(_VALID_DQ_DIMENSIONS))
+    if quality_dimension and quality_dimension not in VALID_DQ_DIMENSIONS:
+        valid = ", ".join(sorted(VALID_DQ_DIMENSIONS))
         raise MCPUserError(f"Invalid quality_dimension `{quality_dimension}`. Valid values: {valid}")
 
     clauses = [TestType.active == "Y"]

From a4c557eafc1487031a07358553c761971413a80b Mon Sep 17 00:00:00 2001
From: Luis <ltrinidad@datakitchen.io>
Date: Tue, 5 May 2026 13:48:23 -0400
Subject: [PATCH 02/11] fix(ui): visual glitches in connections and test
 results

---
 .../frontend/js/pages/connections.js          | 26 ++++++++++++++++---
 testgen/ui/static/js/components/table.js      |  5 ++++
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/testgen/ui/components/frontend/js/pages/connections.js b/testgen/ui/components/frontend/js/pages/connections.js
index 09cb2950..26b63d6a 100644
--- a/testgen/ui/components/frontend/js/pages/connections.js
+++ b/testgen/ui/components/frontend/js/pages/connections.js
@@ -46,6 +46,21 @@ const Connections = (props) => {
     const updatedConnection = van.state(connection);
     const formState = van.state({dirty: false, valid: false});
 
+    // Build the wizard once on first appearance and keep the same instance
+    // across reruns so internal state (currentStepIndex, form values) is not
+    // reset. Re-creating it would send the user back to step 0 every time
+    // Streamlit reruns (e.g. after PreviewTableGroupClicked).
+    let wizardComponent;
+    const buildWizard = () => TableGroupWizard({ emit,
+        project_code: van.derive(() => getValue(props.setup_wizard)?.project_code),
+        table_group: van.derive(() => getValue(props.setup_wizard)?.table_group),
+        table_group_preview: van.derive(() => getValue(props.setup_wizard)?.table_group_preview),
+        steps: van.derive(() => getValue(props.setup_wizard)?.steps),
+        dialog: van.derive(() => getValue(props.setup_wizard)?.dialog),
+        results: van.derive(() => getValue(props.setup_wizard)?.results),
+        standard_cron_sample: van.derive(() => getValue(props.setup_wizard)?.standard_cron_sample),
+        monitor_cron_sample: van.derive(() => getValue(props.setup_wizard)?.monitor_cron_sample),
+    });
 
     return div(
         { id: wrapperId, 'data-testid': 'connections', class: 'flex-column fx-gap-4' },
@@ -110,9 +125,14 @@ const Connections = (props) => {
             },
         ),
         () => {
-            const wizardData = getValue(props.setup_wizard);
-            if (!wizardData) return div();
-            return TableGroupWizard(wizardData, emit);
+            // Once built, always return the same instance so the wizard stays
+            // mounted (its Dialog handles its own open/close via display:none).
+            // Returning a fresh node on rerun would unmount and break VanJS
+            // bindings inside the wizard.
+            if (!getValue(props.setup_wizard) && !wizardComponent) {
+                return '';
+            }
+            return wizardComponent ??= buildWizard();
         },
     );
 }
diff --git a/testgen/ui/static/js/components/table.js b/testgen/ui/static/js/components/table.js
index ac53acc1..8dbe9712 100644
--- a/testgen/ui/static/js/components/table.js
+++ b/testgen/ui/static/js/components/table.js
@@ -104,6 +104,11 @@ const Table = (options, rows) => {
         });
     });
     van.derive(() => {
+        // Depend on `rows` so this derive re-runs whenever the rows array changes.
+        // Without this, states added to `selectedRows` beyond its original length
+        // (e.g., when a filter expansion grows the rows list) have no listener
+        // registered here, and clicks on those new rows silently drop.
+        getValue(rows);
         const selectedRows_ = [];
         for (let i = 0; i < selectedRows.length; i++) {
             if (selectedRows[i].val) {

From a11176426bbfc850be82b34f8c62299d32c52a6c Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Tue, 5 May 2026 00:13:00 -0400
Subject: [PATCH 03/11] fix(bigquery): error on freshness monitor when
 tolerance is null

---
 .../dbsetup_test_types/test_types_Freshness_Trend.yaml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/testgen/template/dbsetup_test_types/test_types_Freshness_Trend.yaml b/testgen/template/dbsetup_test_types/test_types_Freshness_Trend.yaml
index 1ad7fae4..ba60e7c5 100644
--- a/testgen/template/dbsetup_test_types/test_types_Freshness_Trend.yaml
+++ b/testgen/template/dbsetup_test_types/test_types_Freshness_Trend.yaml
@@ -66,7 +66,7 @@ test_types:
         fingerprint AS result_measure,
         CASE
           -- Training mode: tolerances not yet calculated
-          WHEN {LOWER_TOLERANCE} IS NULL AND {UPPER_TOLERANCE} IS NULL THEN -1
+          WHEN CAST({LOWER_TOLERANCE} AS FLOAT64) IS NULL AND CAST({UPPER_TOLERANCE} AS FLOAT64) IS NULL THEN -1
           -- No change and excluded day: suppress
           WHEN fingerprint = '{BASELINE_VALUE}' AND {IS_EXCLUDED_DAY} = 1 THEN 1
           -- No change, beyond time range (business time): LATE
@@ -75,14 +75,14 @@ test_types:
           -- Table changed outside time range (business time): UNEXPECTED
           WHEN fingerprint <> '{BASELINE_VALUE}'
             AND NOT (interval_minutes - {EXCLUDED_MINUTES})
-              BETWEEN {LOWER_TOLERANCE} AND {UPPER_TOLERANCE} THEN 0
+              BETWEEN CAST({LOWER_TOLERANCE} AS FLOAT64) AND CAST({UPPER_TOLERANCE} AS FLOAT64) THEN 0
           ELSE 1
         END AS result_code,
         'Table update detected: ' || CASE WHEN fingerprint <> '{BASELINE_VALUE}' THEN 'Yes' ELSE 'No' END
           || CASE
-            WHEN fingerprint <> '{BASELINE_VALUE}' AND (interval_minutes - {EXCLUDED_MINUTES}) BETWEEN {LOWER_TOLERANCE} AND {UPPER_TOLERANCE} THEN '. On time.'
-            WHEN fingerprint <> '{BASELINE_VALUE}' AND (interval_minutes - {EXCLUDED_MINUTES}) < {LOWER_TOLERANCE} THEN '. Earlier than expected.'
-            WHEN fingerprint <> '{BASELINE_VALUE}' AND (interval_minutes - {EXCLUDED_MINUTES}) > {UPPER_TOLERANCE} THEN '. Later than expected.'
+            WHEN fingerprint <> '{BASELINE_VALUE}' AND (interval_minutes - {EXCLUDED_MINUTES}) BETWEEN CAST({LOWER_TOLERANCE} AS FLOAT64) AND CAST({UPPER_TOLERANCE} AS FLOAT64) THEN '. On time.'
+            WHEN fingerprint <> '{BASELINE_VALUE}' AND (interval_minutes - {EXCLUDED_MINUTES}) < CAST({LOWER_TOLERANCE} AS FLOAT64) THEN '. Earlier than expected.'
+            WHEN fingerprint <> '{BASELINE_VALUE}' AND (interval_minutes - {EXCLUDED_MINUTES}) > CAST({UPPER_TOLERANCE} AS FLOAT64) THEN '. Later than expected.'
             WHEN fingerprint = '{BASELINE_VALUE}' AND {IS_EXCLUDED_DAY} = 0 AND (interval_minutes - {EXCLUDED_MINUTES}) > {THRESHOLD_VALUE} THEN '. Late.'
             ELSE ''
           END AS result_message,

From 61f8d0eb74491ba4f5fa3510064176e1d92998d3 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Tue, 5 May 2026 00:28:04 -0400
Subject: [PATCH 04/11] fix(logs): suppress streamlit warnings

---
 testgen/__main__.py | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/testgen/__main__.py b/testgen/__main__.py
index deffaeec..a2ac98da 100644
--- a/testgen/__main__.py
+++ b/testgen/__main__.py
@@ -1,3 +1,40 @@
+# Silence streamlit's "missing ScriptRunContext" / "No runtime found" /
+# "Session state does not function" warnings, which fire whenever streamlit-
+# decorated code runs outside an active script run (our CLI, scheduler, server,
+# and any import that touches @st.cache_data). Must run before the first
+# streamlit-using import, so it sits at the top of the module.
+#
+# We replace ``set_log_level`` itself, after seeding it to "error". Streamlit's
+# own ``_update_logger`` callback fires on config parse and would otherwise
+# downgrade us back to "info"; the cap floors any later call at ERROR.
+def _silence_streamlit_logs() -> None:
+    import logging as _logging
+
+    try:
+        from streamlit import logger as _st_logger
+    except ImportError:
+        return
+
+    _original = _st_logger.set_log_level
+    _original("error")
+
+    def _capped(level):
+        if isinstance(level, str):
+            try:
+                level_num = getattr(_logging, level.upper())
+            except AttributeError:
+                _original(level)
+                return
+        else:
+            level_num = level
+        _original(max(level_num, _logging.ERROR))
+
+    _st_logger.set_log_level = _capped
+
+
+_silence_streamlit_logs()
+
+
 import base64
 import importlib
 import logging
@@ -895,6 +932,7 @@ def init_ui():
             "run",
             app_file,
             "--browser.gatherUsageStats=false",
+            f"--logger.level={'debug' if settings.IS_DEBUG else 'error'}",
             "--client.showErrorDetails=none",
             "--client.toolbarMode=minimal",
             "--server.enableStaticServing=true",

From e91277e903c8f3dc7c067f868dc227560fa9beed Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Tue, 5 May 2026 13:09:29 -0400
Subject: [PATCH 05/11] fix(standalone): command argument precedence - add log
 path to config.env

---
 testgen/__main__.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/testgen/__main__.py b/testgen/__main__.py
index a2ac98da..1841636a 100644
--- a/testgen/__main__.py
+++ b/testgen/__main__.py
@@ -603,7 +603,7 @@ def generate_secret(length: int = 12) -> str:
 
     # Persist caller-supplied runtime overrides (ports, TLS) so they apply to
     # subsequent `testgen run-app` invocations.
-    persisted_env_vars = ("TG_UI_PORT", "TG_API_PORT", "SSL_CERT_FILE", "SSL_KEY_FILE")
+    persisted_env_vars = ("TG_UI_PORT", "TG_API_PORT", "TESTGEN_LOG_FILE_PATH", "SSL_CERT_FILE", "SSL_KEY_FILE")
     persisted_lines = [f"{name}={os.environ[name]}" for name in persisted_env_vars if os.environ.get(name)]
     if persisted_lines:
         config_lines.extend(["", "# Runtime overrides from installer", *persisted_lines])
@@ -611,6 +611,13 @@ def generate_secret(length: int = 12) -> str:
     config_path.write_text("\n".join(config_lines) + "\n")
     click.echo(f"Config written to {config_path}")
 
+    # `getenv` resolves env vars before config.env, so a pre-existing
+    # TESTGEN_USERNAME / TESTGEN_PASSWORD in the shell would override the
+    # CLI-supplied values and get seeded into the DB. Force the CLI args
+    # to win for the rest of this process.
+    os.environ["TESTGEN_USERNAME"] = username
+    os.environ["TESTGEN_PASSWORD"] = password
+
     # Reload settings — the module was already evaluated at import time
     # before the config file existed.  Reloading re-reads the new file
     # and re-evaluates all module-level variables.

From e28d5a85e03124b1e198f86993523620ee50f3be Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Tue, 5 May 2026 17:17:06 -0400
Subject: [PATCH 06/11] fix(standalone): honor config.env for ports, hosts, and
 base URLs

Several settings called os.getenv directly instead of the local getenv
helper that also reads from ~/.testgen/config.env. As a result, values
written by `testgen standalone-setup` (e.g. TG_UI_PORT) were silently
ignored by `testgen run-app`. Also derive UI_BASE_URL from UI_PORT
instead of Streamlit's STREAMLIT_SERVER_PORT, matching the API side.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 testgen/settings.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/testgen/settings.py b/testgen/settings.py
index 0c69f600..94c4e002 100644
--- a/testgen/settings.py
+++ b/testgen/settings.py
@@ -474,7 +474,7 @@ def _ssl_files_present() -> bool:
     return bool(SSL_CERT_FILE) and os.path.isfile(SSL_CERT_FILE) and bool(SSL_KEY_FILE) and os.path.isfile(SSL_KEY_FILE)
 
 
-_ui_tls_env = os.getenv("TG_UI_TLS_ENABLED", "").lower()
+_ui_tls_env = getenv("TG_UI_TLS_ENABLED", "").lower()
 UI_TLS_ENABLED: bool = _ui_tls_env in ("yes", "true") if _ui_tls_env else _ssl_files_present()
 """
 Enable TLS for the Streamlit UI server.
@@ -484,7 +484,7 @@ def _ssl_files_present() -> bool:
 defaults to: auto-detect
 """
 
-_api_tls_env = os.getenv("TG_API_TLS_ENABLED", "").lower()
+_api_tls_env = getenv("TG_API_TLS_ENABLED", "").lower()
 API_TLS_ENABLED: bool = _api_tls_env in ("yes", "true") if _api_tls_env else _ssl_files_present()
 """
 Enable TLS for the API/MCP server (uvicorn).
@@ -568,7 +568,7 @@ def _ssl_files_present() -> bool:
 defaults to: `yes`
 """
 
-UI_PORT: int = int(os.getenv("TG_UI_PORT", "8501"))
+UI_PORT: int = int(getenv("TG_UI_PORT", "8501"))
 """
 Port for the UI server.
 
@@ -576,7 +576,7 @@ def _ssl_files_present() -> bool:
 defaults to: `8501`
 """
 
-API_PORT: int = int(os.getenv("TG_API_PORT", "8530"))
+API_PORT: int = int(getenv("TG_API_PORT", "8530"))
 """
 Port for the API server.
 
@@ -584,7 +584,7 @@ def _ssl_files_present() -> bool:
 defaults to: `8530`
 """
 
-API_HOST: str = os.getenv("TG_API_HOST", "0.0.0.0")  # noqa: S104
+API_HOST: str = getenv("TG_API_HOST", "0.0.0.0")  # noqa: S104
 """
 Host for the API server.
 
@@ -597,7 +597,7 @@ def _default_base_url() -> str:
     return f"{scheme}://localhost:{API_PORT}"
 
 
-BASE_URL: str = os.getenv("TG_BASE_URL", "") or _default_base_url()
+BASE_URL: str = getenv("TG_BASE_URL", "") or _default_base_url()
 """
 Externally-reachable base URL for the API/MCP/OAuth server.
 
@@ -608,14 +608,13 @@ def _default_base_url() -> str:
 
 def _default_ui_base_url() -> str:
     scheme = "https" if UI_TLS_ENABLED else "http"
-    port = os.getenv("STREAMLIT_SERVER_PORT", "8501")
-    return f"{scheme}://localhost:{port}"
+    return f"{scheme}://localhost:{UI_PORT}"
 
 
-UI_BASE_URL: str = os.getenv("TG_UI_BASE_URL", "") or _default_ui_base_url()
+UI_BASE_URL: str = getenv("TG_UI_BASE_URL", "") or _default_ui_base_url()
 """
 Externally-reachable base URL for the Streamlit UI (used in email links, PDFs).
 
 from env variable: `TG_UI_BASE_URL`
-defaults to: computed from UI_TLS_ENABLED and STREAMLIT_SERVER_PORT
+defaults to: computed from UI_TLS_ENABLED and UI_PORT
 """

From 3c6d005ce6b86d6916695ac6fa4f6a98b71d09ff Mon Sep 17 00:00:00 2001
From: Ricardo Boni <rboni@datakitchen.io>
Date: Mon, 4 May 2026 20:02:17 -0400
Subject: [PATCH 07/11] fix(mcp): hygiene issues review feedback (TG-1029)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Round of review fixes applied on top of the initial implementation:

* Drop the legacy `profile_run_id` from the MCP layer — model methods take
  a `job_execution_id` and JOIN `ProfilingRun` to filter. New
  `ProfilingRun.get_latest_complete_je_id_for_table_group` avoids reading the
  `table_groups.last_complete_profile_run_id` cache (which still points at the
  internal run PK; schema-level cleanup tracked separately).
* Trust the run/JE backfill migration — drop `if x is None` guards and the
  descriptive-string fallback in `_resolve_profile_run`.
* StrEnums for fixed string sets: `Disposition`, `IssueLikelihood`, `PiiRisk`
  (in `common/models/hygiene_issue.py`) and `QualityDimension` (new shared
  `common/enums.py`). Parsers return enum types; coalesce defaults reference
  enum members; sentinel sets dropped.
* Tighten labels: heading uses "for profiling run"; `get_hygiene_issue` no
  longer triples "Run / Profiling Run / Run Date".
* Empty states render with title + italic marker.
* `dq_prevalence` removed from output and dataclasses (no precedent label,
  unhelpful score-engine internal).
---
 testgen/common/enums.py                     |  29 +
 testgen/common/models/hygiene_issue.py      |  45 +-
 testgen/common/models/profiling_run.py      |  17 +
 testgen/mcp/prompts/workflows.py            |   5 +-
 testgen/mcp/tools/common.py                 |  96 ++-
 testgen/mcp/tools/hygiene_issues.py         |  68 +-
 testgen/mcp/tools/reference.py              |  37 +-
 testgen/mcp/tools/test_definitions.py       |  25 +-
 tests/unit/mcp/test_model_hygiene_issue.py  | 362 ++++++++
 tests/unit/mcp/test_model_profiling_run.py  |  65 ++
 tests/unit/mcp/test_prompts_workflows.py    |  55 ++
 tests/unit/mcp/test_tools_common.py         | 193 ++++-
 tests/unit/mcp/test_tools_hygiene_issues.py | 907 ++++++++++++++++++++
 tests/unit/mcp/test_tools_reference.py      |  83 +-
 14 files changed, 1893 insertions(+), 94 deletions(-)
 create mode 100644 testgen/common/enums.py
 create mode 100644 tests/unit/mcp/test_model_hygiene_issue.py
 create mode 100644 tests/unit/mcp/test_model_profiling_run.py
 create mode 100644 tests/unit/mcp/test_prompts_workflows.py
 create mode 100644 tests/unit/mcp/test_tools_hygiene_issues.py

diff --git a/testgen/common/enums.py b/testgen/common/enums.py
new file mode 100644
index 00000000..94d08a37
--- /dev/null
+++ b/testgen/common/enums.py
@@ -0,0 +1,29 @@
+"""Shared enums used across multiple models, services, and surfaces.
+
+Add an enum here when its values are referenced by more than one model file or by
+both the model layer and an outer surface (MCP, API, UI). Single-model enums live
+in their model file.
+"""
+from enum import StrEnum
+
+
+class QualityDimension(StrEnum):
+    """Stored ``dq_dimension`` values shared by ``profile_anomaly_types`` and ``test_types``.
+    Surfaced to users as "Quality Dimension"."""
+    ACCURACY = "Accuracy"
+    COMPLETENESS = "Completeness"
+    CONSISTENCY = "Consistency"
+    RECENCY = "Recency"
+    TIMELINESS = "Timeliness"
+    UNIQUENESS = "Uniqueness"
+    VALIDITY = "Validity"
+
+
+class ImpactDimension(StrEnum):
+    """Stored ``impact_dimension`` values shared by ``profile_anomaly_types`` /
+    ``profile_anomaly_results`` and ``test_types``. The primary dimension breakdown
+    used by scorecards."""
+    RELIABILITY = "Reliability"
+    CONFORMANCE = "Conformance"
+    REGULARITY = "Regularity"
+    USABILITY = "Usability"
diff --git a/testgen/common/models/hygiene_issue.py b/testgen/common/models/hygiene_issue.py
index a950c5a9..497c8180 100644
--- a/testgen/common/models/hygiene_issue.py
+++ b/testgen/common/models/hygiene_issue.py
@@ -2,6 +2,7 @@
 from collections.abc import Iterable
 from dataclasses import dataclass
 from datetime import datetime
+from enum import StrEnum
 from typing import Self
 from uuid import UUID, uuid4
 
@@ -21,6 +22,28 @@
 PII_RISK_RE = re.compile(r"Risk: (MODERATE|HIGH),")
 
 
+class Disposition(StrEnum):
+    """Stored disposition values for ``profile_anomaly_results.disposition`` and
+    ``test_results.disposition``. The user-facing label for ``INACTIVE`` is "Muted"."""
+    CONFIRMED = "Confirmed"
+    DISMISSED = "Dismissed"
+    INACTIVE = "Inactive"
+
+
+class IssueLikelihood(StrEnum):
+    """Stored ``profile_anomaly_types.issue_likelihood`` values."""
+    DEFINITE = "Definite"
+    LIKELY = "Likely"
+    POSSIBLE = "Possible"
+    POTENTIAL_PII = "Potential PII"
+
+
+class PiiRisk(StrEnum):
+    """Risk level extracted from PII issue ``detail`` strings via ``priority`` hybrid."""
+    HIGH = "High"
+    MODERATE = "Moderate"
+
+
 @dataclass
 class IssueLikelihoodCounts:
     """Counts of hygiene issues by likelihood category, with dismissed/inactive separated."""
@@ -51,6 +74,7 @@ class HygieneIssueListRow:
     schema_name: str
     table_name: str
     column_name: str
+    impact_dimension: str | None
     dq_dimension: str | None
     disposition: str
     priority: str | None
@@ -72,6 +96,7 @@ class HygieneIssueSearchRow:
     schema_name: str
     table_name: str
     column_name: str
+    impact_dimension: str | None
     dq_dimension: str | None
     disposition: str
     priority: str | None
@@ -92,7 +117,6 @@ class HygieneIssueDetail:
     schema_name: str
     table_name: str
     column_name: str
-    db_data_type: str | None
     dq_dimension: str | None
     impact_dimension: str | None
     disposition: str
@@ -150,13 +174,12 @@ class HygieneIssue(Entity):
     schema_name: str = Column(String, nullable=False)
     table_name: str = Column(String, nullable=False)
     column_name: str = Column(String, nullable=False)
-    db_data_type: str = Column(String)
 
     detail: str = Column(String, nullable=False)
     disposition: str = Column(String)
     impact_dimension: str = Column(String)
 
-    # Unmapped: column_type, dq_prevalence.
+    # Unmapped: column_type, db_data_type, dq_prevalence.
 
     @hybrid_property
     def priority(self):
@@ -237,12 +260,12 @@ def _priority_order(cls):
     @classmethod
     def list_for_run(
         cls,
-        profile_run_id: UUID,
+        job_execution_id: UUID,
         *clauses,
         page: int = 1,
         limit: int = 50,
     ) -> tuple[list[HygieneIssueListRow], int]:
-        """Paginated hygiene issues for a single profiling run.
+        """Paginated hygiene issues for a single profiling run, scoped by its job_execution_id.
 
         Caller-supplied ``*clauses`` carry every WHERE filter (project scoping, disposition,
         likelihood / pii_risk, table / column / dq_dimension / issue_type filters).
@@ -255,14 +278,16 @@ def list_for_run(
                 cls.schema_name.label("schema_name"),
                 cls.table_name.label("table_name"),
                 cls.column_name.label("column_name"),
+                cls.impact_dimension.label("impact_dimension"),
                 HygieneIssueType.dq_dimension.label("dq_dimension"),
-                func.coalesce(cls.disposition, "Confirmed").label("disposition"),
+                func.coalesce(cls.disposition, Disposition.CONFIRMED).label("disposition"),
                 cls.priority.label("priority"),
                 cls.detail.label("detail"),
                 HygieneIssueType.detail_redactable.label("detail_redactable"),
                 ProfileResult.pii_flag.label("pii_flag"),
             )
             .join(HygieneIssueType, HygieneIssueType.id == cls.type_id)
+            .join(ProfilingRun, ProfilingRun.id == cls.profile_run_id)
             .outerjoin(
                 ProfileResult,
                 and_(
@@ -272,7 +297,7 @@ def list_for_run(
                     ProfileResult.column_name == cls.column_name,
                 ),
             )
-            .where(cls.profile_run_id == profile_run_id, *clauses)
+            .where(ProfilingRun.job_execution_id == job_execution_id, *clauses)
             .order_by(cls._priority_order(), cls.table_name, cls.column_name, cls.id)
         )
         return cls._paginate(query, page=page, limit=limit, data_class=HygieneIssueListRow)
@@ -301,8 +326,9 @@ def search(
                 cls.schema_name.label("schema_name"),
                 cls.table_name.label("table_name"),
                 cls.column_name.label("column_name"),
+                cls.impact_dimension.label("impact_dimension"),
                 HygieneIssueType.dq_dimension.label("dq_dimension"),
-                func.coalesce(cls.disposition, "Confirmed").label("disposition"),
+                func.coalesce(cls.disposition, Disposition.CONFIRMED).label("disposition"),
                 cls.priority.label("priority"),
                 cls.detail.label("detail"),
                 HygieneIssueType.detail_redactable.label("detail_redactable"),
@@ -347,10 +373,9 @@ def get_with_context(cls, issue_id: UUID, *clauses) -> HygieneIssueDetail | None
                 cls.schema_name.label("schema_name"),
                 cls.table_name.label("table_name"),
                 cls.column_name.label("column_name"),
-                cls.db_data_type.label("db_data_type"),
                 HygieneIssueType.dq_dimension.label("dq_dimension"),
                 cls.impact_dimension.label("impact_dimension"),
-                func.coalesce(cls.disposition, "Confirmed").label("disposition"),
+                func.coalesce(cls.disposition, Disposition.CONFIRMED).label("disposition"),
                 cls.priority.label("priority"),
                 cls.detail.label("detail"),
                 HygieneIssueType.detail_redactable.label("detail_redactable"),
diff --git a/testgen/common/models/profiling_run.py b/testgen/common/models/profiling_run.py
index 18485527..95538203 100644
--- a/testgen/common/models/profiling_run.py
+++ b/testgen/common/models/profiling_run.py
@@ -162,6 +162,23 @@ def get_latest_run(cls, project_code: str) -> LatestProfilingRun | None:
             return LatestProfilingRun(str(result["id"]), result["run_time"])
         return None
 
+    @classmethod
+    def get_latest_complete_je_id_for_table_group(cls, table_groups_id: UUID) -> UUID | None:
+        """Return the ``job_execution_id`` of the latest completed profiling run for a table group.
+
+        Computed live from ``profiling_runs`` joined to ``job_executions`` — does not read the
+        legacy ``table_groups.last_complete_profile_run_id`` cache, which points at the internal
+        run PK rather than the JE id.
+        """
+        query = (
+            select(cls.job_execution_id)
+            .join(JobExecution, cls.job_execution_id == JobExecution.id)
+            .where(cls.table_groups_id == table_groups_id, JobExecution.status == JobStatus.COMPLETED)
+            .order_by(desc(JobExecution.started_at))
+            .limit(1)
+        )
+        return get_current_session().scalar(query)
+
     @classmethod
     @st.cache_data(show_spinner=False, hash_funcs=ENTITY_HASH_FUNCS)
     def select_minimal_where(
diff --git a/testgen/mcp/prompts/workflows.py b/testgen/mcp/prompts/workflows.py
index 2317054b..4201bf75 100644
--- a/testgen/mcp/prompts/workflows.py
+++ b/testgen/mcp/prompts/workflows.py
@@ -95,7 +95,6 @@ def hygiene_triage(table_group_id: str | None = None) -> str:
         steps.append(
             "Call `get_data_inventory()` to see projects and table groups, with profiling status per group."
         )
-    steps.append(f"Call `list_profiling_summaries(table_group_id={tg})` to see hygiene issue counts per run.")
     steps.append(f"Call `list_hygiene_issues(table_group_id={tg}, disposition='Confirmed')` for the issues to review.")
     steps.append(
         "For each top issue (ordered by priority), call `get_hygiene_issue(issue_id='...')` for full context — "
@@ -103,8 +102,8 @@ def hygiene_triage(table_group_id: str | None = None) -> str:
     )
     steps.append("For unfamiliar issue types, read `testgen://hygiene-issue-types` once for the reference table.")
     steps.append(
-        "For each issue: explain what was found, then ask the user whether to dismiss the issue "
-        "(call `update_hygiene_issue(issue_id='...', disposition='Dismissed')`) or investigate further."
+        "For each issue: explain what was found, then help the user decide a disposition — **Confirmed**, **Dismissed**, or **Muted**. "
+        "Apply via `update_hygiene_issue(issue_id='...', disposition='...')`, or leave it open for further investigation."
     )
     steps.append("Summarize the triage results and any patterns noted across the issues.")
 
diff --git a/testgen/mcp/tools/common.py b/testgen/mcp/tools/common.py
index d6c2d3a3..9c0c3096 100644
--- a/testgen/mcp/tools/common.py
+++ b/testgen/mcp/tools/common.py
@@ -2,7 +2,8 @@
 from uuid import UUID
 
 from testgen.common.date_service import parse_since
-from testgen.common.models.hygiene_issue import HygieneIssueType
+from testgen.common.enums import ImpactDimension, QualityDimension
+from testgen.common.models.hygiene_issue import Disposition, HygieneIssueType, IssueLikelihood, PiiRisk
 from testgen.common.models.table_group import TableGroup
 from testgen.common.models.test_definition import TestType
 from testgen.common.models.test_result import TestResultStatus
@@ -10,12 +11,15 @@
 from testgen.mcp.exceptions import MCPResourceNotAccessible, MCPUserError
 from testgen.mcp.permissions import get_project_permissions
 
-VALID_DQ_DIMENSIONS = {"Accuracy", "Completeness", "Consistency", "Recency", "Timeliness", "Uniqueness", "Validity"}
-# DB stores "Inactive"; user-facing label is "Muted".
-_DISPOSITION_USER_TO_DB = {"Confirmed": "Confirmed", "Dismissed": "Dismissed", "Muted": "Inactive"}
-_DISPOSITION_DB_TO_USER = {v: k for k, v in _DISPOSITION_USER_TO_DB.items()}
-_VALID_ISSUE_LIKELIHOODS = {"Definite", "Likely", "Possible"}
-_VALID_PII_RISKS = {"High", "Moderate"}
+# User-facing label for ``Disposition.INACTIVE`` is "Muted" — accept that label on input.
+_DISPOSITION_USER_TO_DB: dict[str, Disposition] = {
+    "Confirmed": Disposition.CONFIRMED,
+    "Dismissed": Disposition.DISMISSED,
+    "Muted": Disposition.INACTIVE,
+}
+_DISPOSITION_DB_TO_USER: dict[Disposition, str] = {v: k for k, v in _DISPOSITION_USER_TO_DB.items()}
+# Filter accepts only the regular likelihoods — PII rows are filtered separately via ``pii_risk``.
+_FILTERABLE_LIKELIHOODS = frozenset({IssueLikelihood.DEFINITE, IssueLikelihood.LIKELY, IssueLikelihood.POSSIBLE})
 
 
 def parse_uuid(value: str, label: str = "ID") -> UUID:
@@ -50,44 +54,74 @@ def parse_since_arg(value: str, label: str = "since", *, today: date | None = No
         raise MCPUserError(f"Invalid `{label}`: {err}") from err
 
 
-def parse_quality_dimension(value: str) -> str:
-    if value not in VALID_DQ_DIMENSIONS:
-        valid = ", ".join(sorted(VALID_DQ_DIMENSIONS))
-        raise MCPUserError(f"Invalid quality_dimension `{value}`. Valid values: {valid}")
-    return value
+def parse_impact_dimension(value: str) -> ImpactDimension:
+    try:
+        return ImpactDimension(value)
+    except ValueError as err:
+        valid = ", ".join(d.value for d in ImpactDimension)
+        raise MCPUserError(f"Invalid impact_dimension `{value}`. Valid values: {valid}") from err
+
 
+def parse_quality_dimension(value: str) -> QualityDimension:
+    try:
+        return QualityDimension(value)
+    except ValueError as err:
+        valid = ", ".join(d.value for d in QualityDimension)
+        raise MCPUserError(f"Invalid quality_dimension `{value}`. Valid values: {valid}") from err
 
-def parse_disposition(value: str) -> str:
-    """Validate a user-facing disposition label and return the DB value.
 
-    Accepts ``Confirmed``, ``Dismissed``, ``Muted`` and returns ``Confirmed``,
-    ``Dismissed``, ``Inactive`` respectively (the DB encodes the legacy ``Inactive``).
+def parse_disposition(value: str) -> Disposition:
+    """Validate a user-facing disposition label and return the stored ``Disposition``.
+
+    Accepts ``Confirmed``, ``Dismissed``, ``Muted`` (user-facing labels). The DB encodes
+    ``INACTIVE`` for "Muted" — see ``Disposition``.
     """
-    if value not in _DISPOSITION_USER_TO_DB:
+    db_value = _DISPOSITION_USER_TO_DB.get(value)
+    if db_value is None:
         valid = ", ".join(sorted(_DISPOSITION_USER_TO_DB))
         raise MCPUserError(f"Invalid disposition `{value}`. Valid values: {valid}")
-    return _DISPOSITION_USER_TO_DB[value]
-
+    return db_value
 
-def format_disposition(value: str) -> str:
-    """Map a DB disposition value to its user-facing label (``Inactive`` → ``Muted``)."""
-    return _DISPOSITION_DB_TO_USER.get(value, value)
 
-
-def parse_issue_likelihood_list(values: list[str]) -> list[str]:
-    invalid = [v for v in values if v not in _VALID_ISSUE_LIKELIHOODS]
+def format_disposition(value: Disposition | str) -> str:
+    """Map a stored disposition to its user-facing label (``INACTIVE`` → "Muted")."""
+    try:
+        return _DISPOSITION_DB_TO_USER[Disposition(value)]
+    except ValueError:
+        return str(value)
+
+
+def parse_issue_likelihood_list(values: list[str]) -> list[IssueLikelihood]:
+    parsed: list[IssueLikelihood] = []
+    invalid: list[str] = []
+    for value in values:
+        try:
+            likelihood = IssueLikelihood(value)
+        except ValueError:
+            invalid.append(value)
+            continue
+        if likelihood not in _FILTERABLE_LIKELIHOODS:
+            invalid.append(value)
+            continue
+        parsed.append(likelihood)
     if invalid:
-        valid = ", ".join(sorted(_VALID_ISSUE_LIKELIHOODS))
+        valid = ", ".join(sorted(v.value for v in _FILTERABLE_LIKELIHOODS))
         raise MCPUserError(f"Invalid issue_likelihood values {invalid}. Valid values: {valid}")
-    return values
+    return parsed
 
 
-def parse_pii_risk_list(values: list[str]) -> list[str]:
-    invalid = [v for v in values if v not in _VALID_PII_RISKS]
+def parse_pii_risk_list(values: list[str]) -> list[PiiRisk]:
+    parsed: list[PiiRisk] = []
+    invalid: list[str] = []
+    for value in values:
+        try:
+            parsed.append(PiiRisk(value))
+        except ValueError:
+            invalid.append(value)
     if invalid:
-        valid = ", ".join(sorted(_VALID_PII_RISKS))
+        valid = ", ".join(r.value for r in PiiRisk)
         raise MCPUserError(f"Invalid pii_risk values {invalid}. Valid values: {valid}")
-    return values
+    return parsed
 
 
 def resolve_test_type(short_name: str) -> str:
diff --git a/testgen/mcp/tools/hygiene_issues.py b/testgen/mcp/tools/hygiene_issues.py
index 7101587a..fedabb61 100644
--- a/testgen/mcp/tools/hygiene_issues.py
+++ b/testgen/mcp/tools/hygiene_issues.py
@@ -5,7 +5,7 @@
 from sqlalchemy.sql.functions import func
 
 from testgen.common.models import with_database_session
-from testgen.common.models.hygiene_issue import HygieneIssue, HygieneIssueType
+from testgen.common.models.hygiene_issue import Disposition, HygieneIssue, HygieneIssueType, IssueLikelihood, PiiRisk
 from testgen.common.models.job_execution import JobExecution
 from testgen.common.models.profiling_run import ProfilingRun
 from testgen.common.models.table_group import TableGroup
@@ -17,6 +17,7 @@
     format_page_footer,
     format_page_info,
     parse_disposition,
+    parse_impact_dimension,
     parse_issue_likelihood_list,
     parse_pii_risk_list,
     parse_quality_dimension,
@@ -40,8 +41,8 @@ def _redact_detail(row, view_pii_codes: set[str]) -> str:
 
 
 def _build_likelihood_clause(
-    issue_likelihood: list[str] | None,
-    pii_risk: list[str] | None,
+    issue_likelihood: list[IssueLikelihood] | None,
+    pii_risk: list[PiiRisk] | None,
 ) -> ColumnElement[bool] | None:
     """Construct the WHERE clause for the likelihood / pii_risk filter pair.
 
@@ -52,7 +53,7 @@ def _build_likelihood_clause(
     """
     likelihood_clause = HygieneIssueType.likelihood.in_(issue_likelihood) if issue_likelihood else None
     pii_clause = (
-        and_(HygieneIssueType.likelihood == "Potential PII", HygieneIssue.priority.in_(pii_risk))
+        and_(HygieneIssueType.likelihood == IssueLikelihood.POTENTIAL_PII, HygieneIssue.priority.in_(pii_risk))
         if pii_risk
         else None
     )
@@ -63,23 +64,22 @@ def _build_likelihood_clause(
     return or_(likelihood_clause, pii_clause)
 
 
-def _resolve_profile_run(
+def _resolve_profile_run_je_id(
     *,
     job_execution_id: str | None,
     table_group_id: str | None,
-) -> tuple[UUID, str]:
-    """Resolve the scope to a (profile_run_id, run_id_label) tuple.
+) -> UUID:
+    """Resolve the scope to the ``job_execution_id`` of a single profiling run.
 
-    Mutual-exclusion + at-least-one validation done by caller. Collapses missing
-    runs and inaccessible table groups into a single ``MCPResourceNotAccessible``.
+    Mutual-exclusion + at-least-one validation done by caller. Collapses missing runs
+    and inaccessible table groups into ``MCPResourceNotAccessible``.
     """
     if table_group_id:
         tg = resolve_table_group(table_group_id)
-        if tg.last_complete_profile_run_id is None:
+        je_uuid = ProfilingRun.get_latest_complete_je_id_for_table_group(tg.id)
+        if je_uuid is None:
             raise MCPUserError(f"No completed profiling runs found for table group `{table_group_id}`.")
-        run = ProfilingRun.get(tg.last_complete_profile_run_id)
-        run_id_label = str(run.job_execution_id) if run and run.job_execution_id else f"latest run for table group `{table_group_id}`"
-        return tg.last_complete_profile_run_id, run_id_label
+        return je_uuid
 
     job_uuid = parse_uuid(job_execution_id, "job_execution_id")
     run = ProfilingRun.get_by_id_or_job(job_uuid)
@@ -87,7 +87,7 @@ def _resolve_profile_run(
     tg = TableGroup.get(run.table_groups_id) if run else None
     if run is None or tg is None or not perms.has_access(tg.project_code):
         raise MCPResourceNotAccessible("Profiling run", job_execution_id)
-    return run.id, job_execution_id
+    return run.job_execution_id
 
 
 @with_database_session
@@ -98,6 +98,7 @@ def list_hygiene_issues(
     table_group_id: str | None = None,
     table_name: str | None = None,
     column_name: str | None = None,
+    impact_dimension: str | None = None,
     quality_dimension: str | None = None,
     disposition: str | None = "Confirmed",
     issue_likelihood: list[str] | None = None,
@@ -117,6 +118,8 @@ def list_hygiene_issues(
             Mutually exclusive with ``job_execution_id``.
         table_name: Filter by table name (exact match).
         column_name: Filter by column name (exact match).
+        impact_dimension: Filter by impact dimension ('Reliability', 'Conformance',
+            'Regularity', 'Usability').
         quality_dimension: Filter by data quality dimension ('Accuracy', 'Completeness',
             'Consistency', 'Recency', 'Timeliness', 'Uniqueness', 'Validity').
         disposition: Filter by disposition. Defaults to 'Confirmed'.
@@ -125,7 +128,7 @@ def list_hygiene_issues(
             Providing this filter auto-excludes PII issues; combine with ``pii_risk`` to include both.
         pii_risk: Filter by PII risk level. Values: 'High', 'Moderate'.
             Providing this filter auto-excludes regular issues.
-        issue_type: Filter by hygiene issue type (e.g. 'Personally Identifiable Information').
+        issue_type: Filter by hygiene issue type (e.g. 'Similar Values Match When Standardized').
             See ``testgen://hygiene-issue-types`` for the full list.
         limit: Maximum number of issues per page (default 50, max 200).
         page: Page number, starting from 1 (default 1).
@@ -138,7 +141,7 @@ def list_hygiene_issues(
     validate_limit(limit, 200)
 
     perms = get_project_permissions()
-    profile_run_id, run_id_label = _resolve_profile_run(
+    run_je_id = _resolve_profile_run_je_id(
         job_execution_id=job_execution_id,
         table_group_id=table_group_id,
     )
@@ -148,11 +151,13 @@ def list_hygiene_issues(
 
     clauses = [HygieneIssue.project_code.in_(perms.allowed_codes)]
     disposition_value = parse_disposition(disposition or "Confirmed")
-    clauses.append(func.coalesce(HygieneIssue.disposition, "Confirmed") == disposition_value)
+    clauses.append(func.coalesce(HygieneIssue.disposition, Disposition.CONFIRMED) == disposition_value)
     if table_name:
         clauses.append(HygieneIssue.table_name == table_name)
     if column_name:
         clauses.append(HygieneIssue.column_name == column_name)
+    if impact_dimension:
+        clauses.append(HygieneIssue.impact_dimension == parse_impact_dimension(impact_dimension))
     if quality_dimension:
         clauses.append(HygieneIssueType.dq_dimension == parse_quality_dimension(quality_dimension))
     if issue_type:
@@ -161,10 +166,10 @@ def list_hygiene_issues(
     if likelihood_clause is not None:
         clauses.append(likelihood_clause)
 
-    rows, total = HygieneIssue.list_for_run(profile_run_id, *clauses, page=page, limit=limit)
+    rows, total = HygieneIssue.list_for_run(run_je_id, *clauses, page=page, limit=limit)
 
     doc = MdDoc()
-    doc.heading(1, f"Hygiene Issues for run `{run_id_label}`")
+    doc.heading(1, f"Hygiene Issues for profiling run `{run_je_id}`")
     if not rows:
         doc.text("_No hygiene issues match the supplied filters._")
         return doc.render()
@@ -177,6 +182,8 @@ def list_hygiene_issues(
         doc.heading(2, f"[{r.priority or 'Unknown'}] {r.issue_type_name} on {location}")
         doc.field("Issue ID", r.id, code=True)
         doc.field("Issue Type", r.issue_type_name)
+        if r.impact_dimension:
+            doc.field("Impact Dimension", r.impact_dimension)
         if r.dq_dimension:
             doc.field("Quality Dimension", r.dq_dimension)
         doc.field("Disposition", format_disposition(r.disposition))
@@ -251,10 +258,10 @@ def get_hygiene_issue(*, issue_id: str) -> str:
     doc.field("Table", detail.table_name)
     if detail.column_name:
         doc.field("Column", detail.column_name)
-    if detail.dq_dimension:
-        doc.field("Quality Dimension", detail.dq_dimension)
     if detail.impact_dimension:
         doc.field("Impact Dimension", detail.impact_dimension)
+    if detail.dq_dimension:
+        doc.field("Quality Dimension", detail.dq_dimension)
     doc.field("Disposition", format_disposition(detail.disposition))
     if detail.priority:
         doc.field("Priority", detail.priority)
@@ -287,9 +294,8 @@ def get_hygiene_issue(*, issue_id: str) -> str:
             doc.field("Distinct values", detail.column_distinct_value_ct)
 
     doc.heading(2, "Profiling Run")
-    if detail.job_execution_id:
-        doc.field("Run", detail.job_execution_id, code=True)
-    doc.field("Run Date", detail.started_at)
+    doc.field("ID", detail.job_execution_id, code=True)
+    doc.field("Date", detail.started_at)
 
     return doc.render()
 
@@ -301,6 +307,7 @@ def search_hygiene_issues(
     project_code: str | None = None,
     table_group_id: str | None = None,
     issue_type: str | None = None,
+    impact_dimension: str | None = None,
     quality_dimension: str | None = None,
     disposition: str | None = "Confirmed",
     issue_likelihood: list[str] | None = None,
@@ -318,8 +325,10 @@ def search_hygiene_issues(
     Args:
         project_code: Scope to a specific project.
         table_group_id: UUID of a table group to scope to.
-        issue_type: Filter by hygiene issue type (e.g. 'Personally Identifiable Information').
+        issue_type: Filter by hygiene issue type (e.g. 'Similar Values Match When Standardized').
             See ``testgen://hygiene-issue-types`` for the full list.
+        impact_dimension: Filter by impact dimension ('Reliability', 'Conformance',
+            'Regularity', 'Usability').
         quality_dimension: Filter by data quality dimension ('Accuracy', 'Completeness',
             'Consistency', 'Recency', 'Timeliness', 'Uniqueness', 'Validity').
         disposition: Filter by disposition. Defaults to 'Confirmed'.
@@ -353,10 +362,12 @@ def search_hygiene_issues(
 
     clauses = [
         HygieneIssue.project_code.in_(project_codes),
-        func.coalesce(HygieneIssue.disposition, "Confirmed") == parse_disposition(disposition or "Confirmed"),
+        func.coalesce(HygieneIssue.disposition, Disposition.CONFIRMED) == parse_disposition(disposition or "Confirmed"),
     ]
     if table_group_uuid is not None:
         clauses.append(HygieneIssue.table_groups_id == table_group_uuid)
+    if impact_dimension:
+        clauses.append(HygieneIssue.impact_dimension == parse_impact_dimension(impact_dimension))
     if quality_dimension:
         clauses.append(HygieneIssueType.dq_dimension == parse_quality_dimension(quality_dimension))
     if type_id:
@@ -388,9 +399,10 @@ def search_hygiene_issues(
         doc.field("Issue ID", r.id, code=True)
         doc.field("Issue Type", r.issue_type_name)
         doc.field("Table Group", r.table_groups_name)
-        if r.job_execution_id:
-            doc.field("Profiling Run", r.job_execution_id, code=True)
+        doc.field("Profiling Run", r.job_execution_id, code=True)
         doc.field("Run Date", r.started_at)
+        if r.impact_dimension:
+            doc.field("Impact Dimension", r.impact_dimension)
         if r.dq_dimension:
             doc.field("Quality Dimension", r.dq_dimension)
         if r.priority:
diff --git a/testgen/mcp/tools/reference.py b/testgen/mcp/tools/reference.py
index 23217cdd..ca38634f 100644
--- a/testgen/mcp/tools/reference.py
+++ b/testgen/mcp/tools/reference.py
@@ -92,9 +92,9 @@ def hygiene_issue_types_resource() -> str:
     doc = MdDoc()
     doc.heading(1, "TestGen Hygiene Issue Types Reference")
     doc.table(
-        headers=["Issue Type", "Quality Dimension", "Impact Dimension", "Description", "Suggested Action"],
+        headers=["Issue Type", "Impact Dimension", "Quality Dimension", "Likelihood", "Description", "Suggested Action"],
         rows=[
-            [it.name, it.dq_dimension, it.impact_dimension, it.description, it.suggested_action]
+            [it.name, it.impact_dimension, it.dq_dimension, it.likelihood, it.description, it.suggested_action]
             for it in issue_types
         ],
     )
@@ -112,6 +112,8 @@ def glossary_resource() -> str:
 - **Project** — Top-level organizational unit.
 - **Connection** — Database connection configuration (host, credentials).
 - **Table Group** — A set of tables within a schema that are profiled and tested together.
+- **Profiling Run** — A scan of a table group that produces column-level statistics and detects hygiene issues.
+- **Hygiene Issue** — A potential data-quality concern detected by a profiling run (e.g. PII columns, non-standard blanks, mixed types).
 - **Test Suite** — A collection of test definitions scoped to a table group.
 - **Test Definition** — A configured test with parameters, thresholds, and target table/column.
 - **Test Run** — An execution of a test suite producing test results.
@@ -125,22 +127,41 @@ def glossary_resource() -> str:
 - **Error** — Test could not execute (e.g., missing table or permission issue).
 - **Log** — Informational result recorded for reference.
 
+## Hygiene Issue Likelihood
+
+How likely the issue is to indicate a real data quality problem.
+- **Definite** — Strong signal; almost always a real issue worth fixing.
+- **Likely** — Probable issue; review recommended.
+- **Possible** — Weaker signal; confirm against the data.
+
+PII issues use their own classification: hygiene issues that flag potential personally identifiable information are categorized by **PII Risk** (**High** or **Moderate**) instead of the likelihoods above.
+
 ## Disposition
 
-Disposition is a user-assigned review status for test results:
-- **Confirmed** (default) — Result is valid and counts toward scoring.
-- **Dismissed** — Result reviewed and dismissed (excluded from scoring).
-- **Muted** — Test was deactivated after this result (excluded from scoring).
+Disposition is a user-assigned review status for both test results and hygiene issues:
+- **Confirmed** (default) — Valid finding; counts toward scoring.
+- **Dismissed** — Reviewed and dismissed; excluded from scoring.
+- **Muted** — Acknowledged but suppressed; excluded from scoring. (For test results, this means the test was deactivated after the result.)
 
-## Data Quality Dimensions
+## Quality Dimensions
 
+What aspect of data quality the test or hygiene issue measures.
 - **Accuracy** — Data values are correct and reflect real-world truth.
 - **Completeness** — Required data is present (no unexpected NULLs or blanks).
 - **Consistency** — Data agrees across columns, tables, or systems.
-- **Timeliness** — Data is current and updated within expected windows.
+- **Recency** — Data values themselves reflect recent points in time (e.g. dates in the data fall within expected windows).
+- **Timeliness** — Data is updated on the expected schedule (no stale tables, expected refresh cadence).
 - **Uniqueness** — No unintended duplicates exist.
 - **Validity** — Data conforms to expected formats, ranges, or patterns.
 
+## Impact Dimensions
+
+What's at stake when the data has issues — the primary breakdown used by scorecards.
+- **Reliability** — Data is available and correct when needed.
+- **Conformance** — Data meets contracts, formats, and reference standards.
+- **Regularity** — Data arrives on schedule and stays structurally consistent.
+- **Usability** — Data is shaped so consumers can work with it efficiently.
+
 ## Test Scopes
 
 - **column** — Tests a single column (e.g., null rate, pattern match).
diff --git a/testgen/mcp/tools/test_definitions.py b/testgen/mcp/tools/test_definitions.py
index 3c520fe1..4365ee09 100644
--- a/testgen/mcp/tools/test_definitions.py
+++ b/testgen/mcp/tools/test_definitions.py
@@ -1,12 +1,14 @@
+from testgen.common.enums import ImpactDimension, QualityDimension
 from testgen.common.models import with_database_session
 from testgen.common.models.test_definition import TestDefinition, TestDefinitionNote, TestDefinitionSummary, TestType
 from testgen.common.models.test_result import TestResult
 from testgen.mcp.exceptions import MCPUserError
 from testgen.mcp.permissions import get_project_permissions, mcp_permission
 from testgen.mcp.tools.common import (
-    VALID_DQ_DIMENSIONS,
     format_page_footer,
     format_page_info,
+    parse_impact_dimension,
+    parse_quality_dimension,
     parse_uuid,
     resolve_test_type,
     validate_limit,
@@ -15,7 +17,6 @@
 from testgen.mcp.tools.markdown import MdDoc
 
 _VALID_SCOPES = {"column", "table", "referential", "custom"}
-_VALID_IMPACT_DIMENSIONS = {"Reliability", "Conformance", "Regularity", "Usability"}
 
 
 @with_database_session
@@ -300,20 +301,20 @@ def list_test_types(
     if scope and scope not in _VALID_SCOPES:
         valid = ", ".join(sorted(_VALID_SCOPES))
         raise MCPUserError(f"Invalid scope `{scope}`. Valid values: {valid}")
-    if impact_dimension and impact_dimension not in _VALID_IMPACT_DIMENSIONS:
-        valid = ", ".join(sorted(_VALID_IMPACT_DIMENSIONS))
-        raise MCPUserError(f"Invalid impact_dimension `{impact_dimension}`. Valid values: {valid}")
-    if quality_dimension and quality_dimension not in VALID_DQ_DIMENSIONS:
-        valid = ", ".join(sorted(VALID_DQ_DIMENSIONS))
-        raise MCPUserError(f"Invalid quality_dimension `{quality_dimension}`. Valid values: {valid}")
+    impact_dimension_enum: ImpactDimension | None = (
+        parse_impact_dimension(impact_dimension) if impact_dimension else None
+    )
+    quality_dimension_enum: QualityDimension | None = (
+        parse_quality_dimension(quality_dimension) if quality_dimension else None
+    )
 
     clauses = [TestType.active == "Y"]
     if scope:
         clauses.append(TestType.test_scope == scope)
-    if impact_dimension:
-        clauses.append(TestType.impact_dimension == impact_dimension)
-    if quality_dimension:
-        clauses.append(TestType.dq_dimension == quality_dimension)
+    if impact_dimension_enum is not None:
+        clauses.append(TestType.impact_dimension == impact_dimension_enum)
+    if quality_dimension_enum is not None:
+        clauses.append(TestType.dq_dimension == quality_dimension_enum)
 
     test_types = TestType.select_where(*clauses)
 
diff --git a/tests/unit/mcp/test_model_hygiene_issue.py b/tests/unit/mcp/test_model_hygiene_issue.py
new file mode 100644
index 00000000..bf90e2b6
--- /dev/null
+++ b/tests/unit/mcp/test_model_hygiene_issue.py
@@ -0,0 +1,362 @@
+from datetime import datetime
+from unittest.mock import MagicMock, patch
+from uuid import uuid4
+
+import pytest
+from sqlalchemy.dialects import postgresql
+
+from testgen.common.models.hygiene_issue import (
+    HygieneIssue,
+    HygieneIssueDetail,
+    HygieneIssueListRow,
+    HygieneIssueSearchRow,
+    HygieneIssueType,
+)
+
+
+@pytest.fixture
+def session_mock():
+    with (
+        patch("testgen.common.models.hygiene_issue.get_current_session") as hi_mock,
+        patch("testgen.common.models.entity.get_current_session") as entity_mock,
+    ):
+        entity_mock.return_value = hi_mock.return_value
+        yield hi_mock.return_value
+
+
+def _compiled_sql(captured_query, literal_binds: bool = False) -> str:
+    compile_kwargs = {"literal_binds": True} if literal_binds else {}
+    return str(captured_query.compile(dialect=postgresql.dialect(), compile_kwargs=compile_kwargs))
+
+
+def _all_compiled_sql(session_mock) -> str:
+    """Concatenate all queries seen by session.scalar + session.execute. ``_paginate`` issues
+    both a count and a fetch; the WHERE/JOIN clauses live in both."""
+    queries = [call[0][0] for call in session_mock.scalar.call_args_list]
+    queries += [call[0][0] for call in session_mock.execute.call_args_list]
+    return "\n".join(_compiled_sql(q) for q in queries)
+
+
+def _list_row_mapping(**overrides):
+    base = {
+        "id": uuid4(),
+        "project_code": "demo",
+        "issue_type_name": "Non-Standard Blank Values",
+        "schema_name": "demo",
+        "table_name": "orders",
+        "column_name": "frame_size",
+        "impact_dimension": "Usability",
+        "dq_dimension": "Completeness",
+        "disposition": "Confirmed",
+        "priority": "Definite",
+        "detail": "…",
+        "detail_redactable": False,
+        "pii_flag": None,
+    }
+    base.update(overrides)
+    return base
+
+
+def _search_row_mapping(**overrides):
+    base = _list_row_mapping()
+    base.update({
+        "table_groups_name": "default",
+        "job_execution_id": uuid4(),
+        "started_at": datetime(2026, 5, 1),
+    })
+    base.update(overrides)
+    return base
+
+
+def _detail_row_mapping(**overrides):
+    base = _list_row_mapping()
+    base.update({
+        "type_description": "type description",
+        "suggested_action": "suggested action",
+        "job_execution_id": uuid4(),
+        "started_at": datetime(2026, 5, 1),
+        "column_general_type": "A",
+        "column_db_data_type": "varchar(50)",
+        "column_record_ct": 100,
+        "column_null_value_ct": 5,
+        "column_distinct_value_ct": 50,
+    })
+    base.update(overrides)
+    return base
+
+
+def _stub_paginate(session_mock, *, total=0, rows=()):
+    session_mock.scalar.return_value = total
+    session_mock.execute.return_value.mappings.return_value.all.return_value = list(rows)
+
+
+# ---------------------------------------------------------------------------
+# HygieneIssue.list_for_run
+# ---------------------------------------------------------------------------
+
+
+def test_list_for_run_filters_by_je_id_not_run_pk(session_mock):
+    _stub_paginate(session_mock)
+
+    HygieneIssue.list_for_run(uuid4())
+
+    sql = _all_compiled_sql(session_mock)
+    assert "profiling_runs.job_execution_id =" in sql
+    # The legacy run PK must NOT be the filter:
+    assert "profile_anomaly_results.profile_run_id =" not in sql
+
+
+def test_list_for_run_joins_profile_anomaly_types_inner(session_mock):
+    _stub_paginate(session_mock)
+
+    HygieneIssue.list_for_run(uuid4())
+
+    sql = _all_compiled_sql(session_mock)
+    assert "JOIN profile_anomaly_types" in sql
+
+
+def test_list_for_run_joins_profile_results_outer(session_mock):
+    _stub_paginate(session_mock)
+
+    HygieneIssue.list_for_run(uuid4())
+
+    sql = _all_compiled_sql(session_mock)
+    assert "LEFT OUTER JOIN profile_results" in sql
+    # Composite join condition guards against profile-result drift:
+    assert "profile_results.schema_name = profile_anomaly_results.schema_name" in sql
+    assert "profile_results.table_name = profile_anomaly_results.table_name" in sql
+    assert "profile_results.column_name = profile_anomaly_results.column_name" in sql
+
+
+def test_list_for_run_orders_by_priority_then_table_then_column_then_id(session_mock):
+    _stub_paginate(session_mock)
+
+    HygieneIssue.list_for_run(uuid4())
+
+    sql = _all_compiled_sql(session_mock)
+    # Priority CASE comes first; cls.id last for stable pagination.
+    assert "CASE" in sql
+    assert "ORDER BY" in sql
+    assert "profile_anomaly_results.table_name" in sql
+    assert "profile_anomaly_results.column_name" in sql
+    assert "profile_anomaly_results.id" in sql
+
+
+def test_list_for_run_caller_clauses_appended_to_where(session_mock):
+    _stub_paginate(session_mock)
+
+    HygieneIssue.list_for_run(
+        uuid4(),
+        HygieneIssue.table_name == "orders",
+        HygieneIssue.project_code.in_(["demo"]),
+    )
+
+    sql = _all_compiled_sql(session_mock)
+    assert "profile_anomaly_results.table_name =" in sql
+    assert "profile_anomaly_results.project_code IN" in sql
+
+
+def test_list_for_run_coalesces_disposition(session_mock):
+    """The SELECT list coalesces disposition so NULL rows render as the default."""
+    _stub_paginate(session_mock)
+
+    HygieneIssue.list_for_run(uuid4())
+
+    sql = _all_compiled_sql(session_mock)
+    assert "coalesce(profile_anomaly_results.disposition" in sql
+
+
+def test_list_for_run_returns_paginated_tuple(session_mock):
+    rows = [_list_row_mapping(), _list_row_mapping()]
+    _stub_paginate(session_mock, total=42, rows=rows)
+
+    result_rows, total = HygieneIssue.list_for_run(uuid4())
+
+    assert total == 42
+    assert len(result_rows) == 2
+    assert all(isinstance(r, HygieneIssueListRow) for r in result_rows)
+
+
+# ---------------------------------------------------------------------------
+# HygieneIssue.search
+# ---------------------------------------------------------------------------
+
+
+def test_search_joins_table_group_inner(session_mock):
+    _stub_paginate(session_mock)
+
+    HygieneIssue.search()
+
+    sql = _all_compiled_sql(session_mock)
+    assert "JOIN table_groups" in sql
+
+
+def test_search_joins_job_execution_outer(session_mock):
+    _stub_paginate(session_mock)
+
+    HygieneIssue.search()
+
+    sql = _all_compiled_sql(session_mock)
+    assert "LEFT OUTER JOIN job_executions" in sql
+
+
+def test_search_joins_profile_results_outer(session_mock):
+    _stub_paginate(session_mock)
+
+    HygieneIssue.search()
+
+    sql = _all_compiled_sql(session_mock)
+    assert "LEFT OUTER JOIN profile_results" in sql
+
+
+def test_search_orders_by_started_at_desc_then_priority_then_id(session_mock):
+    _stub_paginate(session_mock)
+
+    HygieneIssue.search()
+
+    sql = _all_compiled_sql(session_mock)
+    assert "ORDER BY" in sql
+    assert "job_executions.started_at DESC" in sql
+    assert "profile_anomaly_results.id" in sql
+
+
+def test_search_caller_clauses_propagate(session_mock):
+    _stub_paginate(session_mock)
+
+    HygieneIssue.search(
+        HygieneIssue.project_code.in_(["demo"]),
+        HygieneIssue.table_name == "orders",
+    )
+
+    sql = _all_compiled_sql(session_mock)
+    assert "profile_anomaly_results.project_code IN" in sql
+    assert "profile_anomaly_results.table_name =" in sql
+
+
+def test_search_returns_paginated_tuple(session_mock):
+    rows = [_search_row_mapping()]
+    _stub_paginate(session_mock, total=7, rows=rows)
+
+    result_rows, total = HygieneIssue.search()
+
+    assert total == 7
+    assert len(result_rows) == 1
+    assert isinstance(result_rows[0], HygieneIssueSearchRow)
+
+
+# ---------------------------------------------------------------------------
+# HygieneIssue.get_with_context
+# ---------------------------------------------------------------------------
+
+
+def test_get_with_context_filters_by_id_and_extra_clauses(session_mock):
+    session_mock.execute.return_value.mappings.return_value.first.return_value = None
+
+    HygieneIssue.get_with_context(uuid4(), HygieneIssue.project_code.in_(["demo"]))
+
+    sql = _compiled_sql(session_mock.execute.call_args[0][0])
+    assert "profile_anomaly_results.id =" in sql
+    assert "profile_anomaly_results.project_code IN" in sql
+
+
+def test_get_with_context_outer_joins_profile_results(session_mock):
+    session_mock.execute.return_value.mappings.return_value.first.return_value = None
+
+    HygieneIssue.get_with_context(uuid4())
+
+    sql = _compiled_sql(session_mock.execute.call_args[0][0])
+    assert "LEFT OUTER JOIN profile_results" in sql
+
+
+def test_get_with_context_outer_joins_job_execution(session_mock):
+    session_mock.execute.return_value.mappings.return_value.first.return_value = None
+
+    HygieneIssue.get_with_context(uuid4())
+
+    sql = _compiled_sql(session_mock.execute.call_args[0][0])
+    assert "LEFT OUTER JOIN job_executions" in sql
+
+
+def test_get_with_context_returns_none_when_no_row(session_mock):
+    session_mock.execute.return_value.mappings.return_value.first.return_value = None
+
+    result = HygieneIssue.get_with_context(uuid4())
+
+    assert result is None
+
+
+def test_get_with_context_returns_dataclass_when_row_present(session_mock):
+    session_mock.execute.return_value.mappings.return_value.first.return_value = _detail_row_mapping()
+
+    result = HygieneIssue.get_with_context(uuid4())
+
+    assert isinstance(result, HygieneIssueDetail)
+    assert result.issue_type_name == "Non-Standard Blank Values"
+
+
+# ---------------------------------------------------------------------------
+# HygieneIssue.update_disposition
+# ---------------------------------------------------------------------------
+
+
+def test_update_disposition_compiles_to_update_with_id_and_clauses(session_mock):
+    session_mock.execute.return_value.rowcount = 1
+
+    HygieneIssue.update_disposition(
+        uuid4(),
+        "Dismissed",
+        HygieneIssue.project_code.in_(["demo"]),
+    )
+
+    sql = _compiled_sql(session_mock.execute.call_args[0][0])
+    assert "UPDATE profile_anomaly_results" in sql
+    assert "SET disposition" in sql
+    assert "profile_anomaly_results.id =" in sql
+    # Project-scope clause must be in the WHERE — write-side authorization.
+    assert "profile_anomaly_results.project_code IN" in sql
+
+
+def test_update_disposition_returns_true_when_rowcount_positive(session_mock):
+    session_mock.execute.return_value.rowcount = 1
+
+    assert HygieneIssue.update_disposition(uuid4(), "Dismissed") is True
+
+
+def test_update_disposition_returns_false_when_rowcount_zero(session_mock):
+    session_mock.execute.return_value.rowcount = 0
+
+    assert HygieneIssue.update_disposition(uuid4(), "Dismissed") is False
+
+
+# ---------------------------------------------------------------------------
+# HygieneIssueType.select_where
+# ---------------------------------------------------------------------------
+
+
+def test_hygiene_issue_type_select_where_no_clauses_returns_all(session_mock):
+    session_mock.scalars.return_value = [MagicMock(), MagicMock()]
+
+    result = HygieneIssueType.select_where()
+
+    assert len(result) == 2
+    sql = _compiled_sql(session_mock.scalars.call_args[0][0])
+    assert "SELECT" in sql.upper()
+    assert "profile_anomaly_types" in sql
+
+
+def test_hygiene_issue_type_select_where_with_order_by(session_mock):
+    session_mock.scalars.return_value = []
+
+    HygieneIssueType.select_where(order_by=(HygieneIssueType.name,))
+
+    sql = _compiled_sql(session_mock.scalars.call_args[0][0])
+    assert "ORDER BY profile_anomaly_types.anomaly_name" in sql
+
+
+def test_hygiene_issue_type_select_where_filter_clause(session_mock):
+    session_mock.scalars.return_value = []
+
+    HygieneIssueType.select_where(HygieneIssueType.name == "Some Type")
+
+    sql = _compiled_sql(session_mock.scalars.call_args[0][0])
+    assert "profile_anomaly_types.anomaly_name =" in sql
diff --git a/tests/unit/mcp/test_model_profiling_run.py b/tests/unit/mcp/test_model_profiling_run.py
new file mode 100644
index 00000000..94d88b1b
--- /dev/null
+++ b/tests/unit/mcp/test_model_profiling_run.py
@@ -0,0 +1,65 @@
+from unittest.mock import patch
+from uuid import uuid4
+
+import pytest
+from sqlalchemy.dialects import postgresql
+
+from testgen.common.models.profiling_run import ProfilingRun
+
+
+@pytest.fixture
+def session_mock():
+    with patch("testgen.common.models.profiling_run.get_current_session") as mock:
+        yield mock.return_value
+
+
+def _compiled_sql(captured_query) -> str:
+    return str(captured_query.compile(dialect=postgresql.dialect()))
+
+
+def test_get_latest_complete_je_id_returns_je_id_when_present(session_mock):
+    je_id = uuid4()
+    session_mock.scalar.return_value = je_id
+
+    result = ProfilingRun.get_latest_complete_je_id_for_table_group(uuid4())
+
+    assert result == je_id
+
+
+def test_get_latest_complete_je_id_returns_none_when_no_runs(session_mock):
+    session_mock.scalar.return_value = None
+
+    result = ProfilingRun.get_latest_complete_je_id_for_table_group(uuid4())
+
+    assert result is None
+
+
+def test_get_latest_complete_je_id_filters_to_completed(session_mock):
+    session_mock.scalar.return_value = None
+
+    ProfilingRun.get_latest_complete_je_id_for_table_group(uuid4())
+
+    sql = _compiled_sql(session_mock.scalar.call_args[0][0])
+    assert "job_executions.status =" in sql
+
+
+def test_get_latest_complete_je_id_orders_desc_limit_1(session_mock):
+    session_mock.scalar.return_value = None
+
+    ProfilingRun.get_latest_complete_je_id_for_table_group(uuid4())
+
+    sql = _compiled_sql(session_mock.scalar.call_args[0][0])
+    assert "ORDER BY job_executions.started_at DESC" in sql
+    assert "LIMIT" in sql
+
+
+def test_get_latest_complete_je_id_selects_je_id_not_run_pk(session_mock):
+    """Pin the docstring contract — does NOT read ``table_groups.last_complete_profile_run_id``,
+    selects the JE id directly from ``profiling_runs``."""
+    session_mock.scalar.return_value = None
+
+    ProfilingRun.get_latest_complete_je_id_for_table_group(uuid4())
+
+    sql = _compiled_sql(session_mock.scalar.call_args[0][0])
+    assert "SELECT profiling_runs.job_execution_id" in sql
+    assert "table_groups.last_complete_profile_run_id" not in sql
diff --git a/tests/unit/mcp/test_prompts_workflows.py b/tests/unit/mcp/test_prompts_workflows.py
new file mode 100644
index 00000000..ac52520a
--- /dev/null
+++ b/tests/unit/mcp/test_prompts_workflows.py
@@ -0,0 +1,55 @@
+from testgen.mcp.prompts.workflows import hygiene_triage
+
+
+def test_hygiene_triage_with_table_group_id_interpolates_id():
+    tg_id = "abc-123-def"
+    result = hygiene_triage(table_group_id=tg_id)
+
+    assert tg_id in result
+    assert f"table_group_id='{tg_id}'" in result
+    # Without the inventory step:
+    assert "get_data_inventory" not in result
+    assert "Pick a table group" not in result
+    assert "Focus on table group" in result
+
+
+def test_hygiene_triage_without_table_group_id_uses_placeholder():
+    result = hygiene_triage()
+
+    assert "Pick a table group" in result
+    assert "get_data_inventory" in result
+    # With the discovery step we expect the placeholder form for the call examples:
+    assert "table_group_id='...'" in result
+
+
+def test_hygiene_triage_steps_numbered_consecutively_with_id():
+    result = hygiene_triage(table_group_id="abc")
+
+    # With table_group_id supplied, the inventory step is omitted → 5 steps numbered 1-5.
+    for n in (1, 2, 3, 4, 5):
+        assert f"{n}. " in result
+    assert "6. " not in result
+
+
+def test_hygiene_triage_steps_numbered_consecutively_without_id():
+    result = hygiene_triage()
+
+    # Without table_group_id, the inventory step is included → 6 steps numbered 1-6.
+    for n in (1, 2, 3, 4, 5, 6):
+        assert f"{n}. " in result
+    assert "7. " not in result
+
+
+def test_hygiene_triage_offers_all_three_dispositions():
+    """The prompt should not push 'Dismissed' as the action — review feedback."""
+    result = hygiene_triage()
+
+    assert "Confirmed" in result
+    assert "Dismissed" in result
+    assert "Muted" in result
+    assert "update_hygiene_issue" in result
+
+
+def test_hygiene_triage_references_resource_for_unfamiliar_types():
+    result = hygiene_triage()
+    assert "testgen://hygiene-issue-types" in result
diff --git a/tests/unit/mcp/test_tools_common.py b/tests/unit/mcp/test_tools_common.py
index 8aaf8865..0e5e1b03 100644
--- a/tests/unit/mcp/test_tools_common.py
+++ b/tests/unit/mcp/test_tools_common.py
@@ -1,10 +1,25 @@
+from unittest.mock import MagicMock, patch
 from uuid import UUID
 
 import pytest
 
+from testgen.common.enums import ImpactDimension, QualityDimension
+from testgen.common.models.hygiene_issue import Disposition, IssueLikelihood, PiiRisk
 from testgen.common.models.test_result import TestResultStatus
 from testgen.mcp.exceptions import MCPUserError
-from testgen.mcp.tools.common import parse_result_status, parse_uuid, validate_limit, validate_page
+from testgen.mcp.tools.common import (
+    format_disposition,
+    parse_disposition,
+    parse_impact_dimension,
+    parse_issue_likelihood_list,
+    parse_pii_risk_list,
+    parse_quality_dimension,
+    parse_result_status,
+    parse_uuid,
+    resolve_issue_type,
+    validate_limit,
+    validate_page,
+)
 
 # --- parse_uuid ---
 
@@ -88,3 +103,179 @@ def test_validate_limit_rejects_out_of_range(bad):
 def test_validate_limit_message_includes_max():
     with pytest.raises(MCPUserError, match="between 1 and 200"):
         validate_limit(0, 200)
+
+
+# --- parse_disposition / format_disposition ---
+
+
+@pytest.mark.parametrize(
+    "user_label,expected",
+    [
+        ("Confirmed", Disposition.CONFIRMED),
+        ("Dismissed", Disposition.DISMISSED),
+        ("Muted", Disposition.INACTIVE),
+    ],
+)
+def test_parse_disposition_user_labels_to_db_value(user_label, expected):
+    assert parse_disposition(user_label) is expected
+
+
+def test_parse_disposition_rejects_db_value_inactive():
+    """``Inactive`` is the DB value, not user-facing — accepting it would create two
+    spellings for the same disposition."""
+    with pytest.raises(MCPUserError, match="Invalid disposition"):
+        parse_disposition("Inactive")
+
+
+def test_parse_disposition_rejects_unknown_lists_valid_values():
+    with pytest.raises(MCPUserError, match="Valid values:") as exc_info:
+        parse_disposition("Bogus")
+    msg = str(exc_info.value)
+    assert "Confirmed" in msg
+    assert "Dismissed" in msg
+    assert "Muted" in msg
+
+
+def test_parse_disposition_case_sensitive():
+    with pytest.raises(MCPUserError):
+        parse_disposition("confirmed")
+
+
+@pytest.mark.parametrize(
+    "db_value,expected",
+    [
+        (Disposition.CONFIRMED, "Confirmed"),
+        (Disposition.DISMISSED, "Dismissed"),
+        (Disposition.INACTIVE, "Muted"),
+    ],
+)
+def test_format_disposition_db_to_user_label(db_value, expected):
+    assert format_disposition(db_value) == expected
+
+
+def test_format_disposition_accepts_string_form():
+    """Coalesce on the column produces a plain string at runtime — both forms must work."""
+    assert format_disposition("Inactive") == "Muted"
+    assert format_disposition("Confirmed") == "Confirmed"
+
+
+def test_format_disposition_unknown_falls_through_to_string():
+    assert format_disposition("WhoKnows") == "WhoKnows"
+
+
+# --- parse_impact_dimension ---
+
+
+@pytest.mark.parametrize("value", [d.value for d in ImpactDimension])
+def test_parse_impact_dimension_valid(value):
+    assert parse_impact_dimension(value) == ImpactDimension(value)
+
+
+def test_parse_impact_dimension_invalid_lists_valid_values():
+    with pytest.raises(MCPUserError, match="Invalid impact_dimension") as exc_info:
+        parse_impact_dimension("BadDim")
+    msg = str(exc_info.value)
+    for d in ImpactDimension:
+        assert d.value in msg
+
+
+# --- parse_quality_dimension ---
+
+
+@pytest.mark.parametrize("value", [d.value for d in QualityDimension])
+def test_parse_quality_dimension_valid(value):
+    assert parse_quality_dimension(value) == QualityDimension(value)
+
+
+def test_parse_quality_dimension_includes_recency():
+    """Recency was added during the TG-1029 enum migration; pin it as a valid value."""
+    assert parse_quality_dimension("Recency") == QualityDimension.RECENCY
+
+
+def test_parse_quality_dimension_invalid_lists_valid_values():
+    with pytest.raises(MCPUserError, match="Invalid quality_dimension") as exc_info:
+        parse_quality_dimension("BadDim")
+    msg = str(exc_info.value)
+    for d in QualityDimension:
+        assert d.value in msg
+
+
+# --- parse_issue_likelihood_list ---
+
+
+def test_parse_issue_likelihood_list_accepts_three_filterable_values():
+    result = parse_issue_likelihood_list(["Definite", "Likely", "Possible"])
+    assert result == [IssueLikelihood.DEFINITE, IssueLikelihood.LIKELY, IssueLikelihood.POSSIBLE]
+
+
+def test_parse_issue_likelihood_list_rejects_potential_pii():
+    """``Potential PII`` is a valid IssueLikelihood enum value but NOT a valid filter input —
+    PII issues are filtered separately via ``pii_risk``. Locking this prevents a future
+    'fix' that allows the full enum and breaks the auto-exclude API contract."""
+    with pytest.raises(MCPUserError, match="Invalid issue_likelihood"):
+        parse_issue_likelihood_list(["Potential PII"])
+
+
+def test_parse_issue_likelihood_list_invalid_lists_valid_values_excluding_pii():
+    with pytest.raises(MCPUserError, match="Valid values:") as exc_info:
+        parse_issue_likelihood_list(["Bogus"])
+    msg = str(exc_info.value)
+    assert "Definite" in msg
+    assert "Likely" in msg
+    assert "Possible" in msg
+    assert "Potential PII" not in msg
+
+
+def test_parse_issue_likelihood_list_collects_all_invalid():
+    with pytest.raises(MCPUserError) as exc_info:
+        parse_issue_likelihood_list(["Definite", "Bogus", "Other"])
+    msg = str(exc_info.value)
+    assert "Bogus" in msg
+    assert "Other" in msg
+
+
+def test_parse_issue_likelihood_list_empty_returns_empty():
+    assert parse_issue_likelihood_list([]) == []
+
+
+# --- parse_pii_risk_list ---
+
+
+def test_parse_pii_risk_list_accepts_high_moderate():
+    assert parse_pii_risk_list(["High", "Moderate"]) == [PiiRisk.HIGH, PiiRisk.MODERATE]
+
+
+def test_parse_pii_risk_list_rejects_low():
+    with pytest.raises(MCPUserError, match="Invalid pii_risk"):
+        parse_pii_risk_list(["Low"])
+
+
+def test_parse_pii_risk_list_collects_all_invalid():
+    with pytest.raises(MCPUserError) as exc_info:
+        parse_pii_risk_list(["High", "Bogus", "Wrong"])
+    msg = str(exc_info.value)
+    assert "Bogus" in msg
+    assert "Wrong" in msg
+
+
+# --- resolve_issue_type ---
+
+
+def test_resolve_issue_type_found_returns_id():
+    fake = MagicMock()
+    fake.id = "1015"
+    with patch(
+        "testgen.mcp.tools.common.HygieneIssueType.select_where", return_value=[fake]
+    ) as select_where:
+        result = resolve_issue_type("Personally Identifiable Information")
+    assert result == "1015"
+    assert select_where.call_count == 1
+
+
+def test_resolve_issue_type_not_found_raises_with_resource_hint():
+    with patch(
+        "testgen.mcp.tools.common.HygieneIssueType.select_where", return_value=[]
+    ):
+        with pytest.raises(MCPUserError, match="Unknown hygiene issue type") as exc_info:
+            resolve_issue_type("Made-Up Type")
+    assert "testgen://hygiene-issue-types" in str(exc_info.value)
diff --git a/tests/unit/mcp/test_tools_hygiene_issues.py b/tests/unit/mcp/test_tools_hygiene_issues.py
new file mode 100644
index 00000000..0a81ce3f
--- /dev/null
+++ b/tests/unit/mcp/test_tools_hygiene_issues.py
@@ -0,0 +1,907 @@
+from datetime import datetime
+from unittest.mock import MagicMock, patch
+from uuid import uuid4
+
+import pytest
+from sqlalchemy.dialects import postgresql
+
+from testgen.common.models.hygiene_issue import Disposition, HygieneIssue, IssueLikelihood
+from testgen.common.models.profiling_run import ProfilingRun
+from testgen.common.pii_masking import PII_REDACTED
+from testgen.mcp.exceptions import MCPResourceNotAccessible, MCPUserError
+from testgen.mcp.permissions import ProjectPermissions, _mcp_project_permissions
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _list_row(**overrides):
+    base = {
+        "id": uuid4(),
+        "project_code": "demo",
+        "issue_type_name": "Non-Standard Blank Values",
+        "schema_name": "demo",
+        "table_name": "orders",
+        "column_name": "frame_size",
+        "impact_dimension": "Usability",
+        "dq_dimension": "Completeness",
+        "disposition": Disposition.CONFIRMED,
+        "priority": "Definite",
+        "detail": "Dummy: 5",
+        "detail_redactable": False,
+        "pii_flag": None,
+    }
+    base.update(overrides)
+    return MagicMock(**base)
+
+
+def _search_row(**overrides):
+    base = {
+        "id": uuid4(),
+        "project_code": "demo",
+        "issue_type_name": "Non-Standard Blank Values",
+        "table_groups_name": "default",
+        "job_execution_id": uuid4(),
+        "started_at": datetime(2026, 5, 1),
+        "schema_name": "demo",
+        "table_name": "orders",
+        "column_name": "frame_size",
+        "impact_dimension": "Usability",
+        "dq_dimension": "Completeness",
+        "disposition": Disposition.CONFIRMED,
+        "priority": "Definite",
+        "detail": "Dummy: 5",
+        "detail_redactable": False,
+        "pii_flag": None,
+    }
+    base.update(overrides)
+    return MagicMock(**base)
+
+
+def _detail_row(**overrides):
+    base = {
+        "id": uuid4(),
+        "project_code": "demo",
+        "issue_type_name": "Non-Standard Blank Values",
+        "type_description": "Description body.",
+        "suggested_action": "Suggested action body.",
+        "schema_name": "demo",
+        "table_name": "orders",
+        "column_name": "frame_size",
+        "dq_dimension": "Completeness",
+        "impact_dimension": "Usability",
+        "disposition": Disposition.CONFIRMED,
+        "priority": "Definite",
+        "detail": "Dummy: 5",
+        "detail_redactable": False,
+        "pii_flag": None,
+        "job_execution_id": uuid4(),
+        "started_at": datetime(2026, 5, 1),
+        "column_general_type": "A",
+        "column_db_data_type": "varchar(50)",
+        "column_record_ct": 100,
+        "column_null_value_ct": 5,
+        "column_distinct_value_ct": 50,
+    }
+    base.update(overrides)
+    return MagicMock(**base)
+
+
+def _mock_table_group(project_code="demo"):
+    tg = MagicMock()
+    tg.id = uuid4()
+    tg.project_code = project_code
+    return tg
+
+
+def _mock_run(project_code="demo"):
+    run = MagicMock()
+    run.id = uuid4()
+    run.job_execution_id = uuid4()
+    run.table_groups_id = uuid4()
+    run.project_code = project_code
+    return run
+
+
+def _compiled_clauses(call_args) -> str:
+    """Compile every positional arg of a model-method call to Postgres SQL."""
+    pieces = []
+    for arg in call_args.args:
+        try:
+            pieces.append(str(arg.compile(dialect=postgresql.dialect())))
+        except (AttributeError, TypeError):
+            pieces.append(str(arg))
+    return "\n".join(pieces)
+
+
+def _set_full_perms(permission="view"):
+    """Manually set a permission context. Used for helpers that read the contextvar
+    directly without going through @mcp_permission (e.g. _resolve_profile_run_je_id)."""
+    perms = MagicMock(spec=ProjectPermissions)
+    perms.memberships = {"demo": "role_a"}
+    perms.permission = permission
+    perms.username = "test_user"
+    perms.allowed_codes = ["demo"]
+    perms.codes_allowed_to.return_value = ["demo"]
+    perms.has_access.side_effect = lambda code: code in ["demo"]
+    return _mcp_project_permissions.set(perms)
+
+
+# ---------------------------------------------------------------------------
+# _redact_detail
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "redactable,pii_flag,project_in_view_pii,should_redact",
+    [
+        (True, "B/NAME/Individual", False, True),
+        (True, "B/NAME/Individual", True, False),
+        (True, None, False, False),
+        (False, "B/NAME/Individual", False, False),
+        (False, None, True, False),
+        (None, None, False, False),
+    ],
+)
+def test_redact_detail_matrix(redactable, pii_flag, project_in_view_pii, should_redact):
+    from testgen.mcp.tools.hygiene_issues import _redact_detail
+
+    row = _list_row(
+        detail_redactable=redactable,
+        pii_flag=pii_flag,
+        detail="ssn=123-45-6789",
+        project_code="demo",
+    )
+    view_pii_codes = {"demo"} if project_in_view_pii else set()
+    result = _redact_detail(row, view_pii_codes)
+
+    assert (result == PII_REDACTED) is should_redact
+    if not should_redact:
+        assert result == "ssn=123-45-6789"
+
+
+def test_redact_detail_view_pii_per_project():
+    from testgen.mcp.tools.hygiene_issues import _redact_detail
+
+    row = _list_row(detail_redactable=True, pii_flag="B/NAME", detail="raw", project_code="proj_b")
+    assert _redact_detail(row, {"proj_a"}) == PII_REDACTED
+
+
+# ---------------------------------------------------------------------------
+# _build_likelihood_clause
+# ---------------------------------------------------------------------------
+
+
+def _compile_clause(clause) -> str:
+    return str(clause.compile(dialect=postgresql.dialect()))
+
+
+def test_build_likelihood_clause_neither_returns_none():
+    from testgen.mcp.tools.hygiene_issues import _build_likelihood_clause
+
+    assert _build_likelihood_clause(None, None) is None
+
+
+def test_build_likelihood_clause_likelihood_only_excludes_pii():
+    from testgen.mcp.tools.hygiene_issues import _build_likelihood_clause
+
+    sql = _compile_clause(_build_likelihood_clause([IssueLikelihood.DEFINITE], None))
+    assert "profile_anomaly_types.issue_likelihood IN" in sql
+    assert "Potential PII" not in sql
+
+
+def test_build_likelihood_clause_pii_only_uses_priority_hybrid():
+    from testgen.mcp.tools.hygiene_issues import _build_likelihood_clause
+
+    sql = _compile_clause(_build_likelihood_clause(None, ["High"]))
+    assert "profile_anomaly_types.issue_likelihood =" in sql
+    # priority hybrid expands to a CASE expression in SQL:
+    assert "CASE" in sql
+
+
+def test_build_likelihood_clause_both_returns_or_combination():
+    from testgen.mcp.tools.hygiene_issues import _build_likelihood_clause
+
+    sql = _compile_clause(_build_likelihood_clause([IssueLikelihood.DEFINITE], ["High"]))
+    assert " OR " in sql
+    assert "issue_likelihood IN" in sql
+    assert "issue_likelihood =" in sql
+
+
+# ---------------------------------------------------------------------------
+# _resolve_profile_run_je_id
+# ---------------------------------------------------------------------------
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+def test_resolve_je_id_table_group_branch(mock_latest, mock_resolve_tg, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import _resolve_profile_run_je_id
+
+    tg = _mock_table_group()
+    mock_resolve_tg.return_value = tg
+    expected_je = uuid4()
+    mock_latest.return_value = expected_je
+
+    token = _set_full_perms()
+    try:
+        result = _resolve_profile_run_je_id(job_execution_id=None, table_group_id=str(uuid4()))
+    finally:
+        _mcp_project_permissions.reset(token)
+
+    assert result == expected_je
+    mock_latest.assert_called_once_with(tg.id)
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+def test_resolve_je_id_table_group_no_completed_runs(mock_latest, mock_resolve_tg, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import _resolve_profile_run_je_id
+
+    mock_resolve_tg.return_value = _mock_table_group()
+    mock_latest.return_value = None
+
+    token = _set_full_perms()
+    try:
+        with pytest.raises(MCPUserError, match="No completed profiling runs"):
+            _resolve_profile_run_je_id(job_execution_id=None, table_group_id=str(uuid4()))
+    finally:
+        _mcp_project_permissions.reset(token)
+
+
+@patch("testgen.mcp.tools.hygiene_issues.TableGroup")
+@patch.object(ProfilingRun, "get_by_id_or_job")
+def test_resolve_je_id_je_branch_unknown_run(mock_get, mock_tg_cls, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import _resolve_profile_run_je_id
+
+    mock_get.return_value = None
+
+    token = _set_full_perms()
+    try:
+        with pytest.raises(MCPResourceNotAccessible):
+            _resolve_profile_run_je_id(job_execution_id=str(uuid4()), table_group_id=None)
+    finally:
+        _mcp_project_permissions.reset(token)
+
+
+@patch("testgen.mcp.tools.hygiene_issues.TableGroup")
+@patch.object(ProfilingRun, "get_by_id_or_job")
+def test_resolve_je_id_je_branch_inaccessible_tg(mock_get, mock_tg_cls, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import _resolve_profile_run_je_id
+
+    mock_get.return_value = _mock_run()
+    mock_tg_cls.get.return_value = _mock_table_group(project_code="forbidden")
+
+    token = _set_full_perms()  # only has access to "demo", not "forbidden"
+    try:
+        with pytest.raises(MCPResourceNotAccessible):
+            _resolve_profile_run_je_id(job_execution_id=str(uuid4()), table_group_id=None)
+    finally:
+        _mcp_project_permissions.reset(token)
+
+
+# ---------------------------------------------------------------------------
+# list_hygiene_issues
+# ---------------------------------------------------------------------------
+
+
+def test_list_hygiene_issues_both_args_rejected(db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    with pytest.raises(MCPUserError, match="Pass either"):
+        list_hygiene_issues(job_execution_id=str(uuid4()), table_group_id=str(uuid4()))
+
+
+def test_list_hygiene_issues_neither_arg_rejected(db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    with pytest.raises(MCPUserError, match="Provide either"):
+        list_hygiene_issues()
+
+
+def test_list_hygiene_issues_invalid_je_uuid(db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    with pytest.raises(MCPUserError, match="not a valid UUID"):
+        list_hygiene_issues(job_execution_id="not-a-uuid")
+
+
+@patch("testgen.mcp.tools.hygiene_issues.TableGroup")
+@patch.object(ProfilingRun, "get_by_id_or_job")
+@patch.object(HygieneIssue, "list_for_run")
+def test_list_hygiene_issues_resolves_via_je_id(mock_list, mock_get, mock_tg_cls, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    run = _mock_run()
+    mock_get.return_value = run
+    mock_tg_cls.get.return_value = _mock_table_group()
+    mock_list.return_value = ([], 0)
+
+    list_hygiene_issues(job_execution_id=str(uuid4()))
+
+    assert mock_list.call_args.args[0] == run.job_execution_id
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+@patch.object(HygieneIssue, "list_for_run")
+def test_list_hygiene_issues_resolves_via_table_group(
+    mock_list, mock_latest, mock_resolve_tg, db_session_mock,
+):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    mock_resolve_tg.return_value = _mock_table_group()
+    expected_je = uuid4()
+    mock_latest.return_value = expected_je
+    mock_list.return_value = ([], 0)
+
+    list_hygiene_issues(table_group_id=str(uuid4()))
+
+    assert mock_list.call_args.args[0] == expected_je
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+@patch.object(HygieneIssue, "list_for_run")
+def test_list_hygiene_issues_happy_path_renders_fields(
+    mock_list, mock_latest, mock_resolve_tg, db_session_mock,
+):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    mock_resolve_tg.return_value = _mock_table_group()
+    mock_latest.return_value = uuid4()
+    issue_id = uuid4()
+    mock_list.return_value = ([_list_row(id=issue_id, detail="Dummy Values: 5")], 1)
+
+    result = list_hygiene_issues(table_group_id=str(uuid4()))
+
+    assert "[Definite]" in result
+    assert "Non-Standard Blank Values" in result
+    assert "`frame_size` in `orders`" in result
+    assert str(issue_id) in result
+    assert "Impact Dimension" in result
+    assert "Quality Dimension" in result
+    assert "Disposition" in result
+    assert "Confirmed" in result
+    assert "Dummy Values: 5" in result
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+@patch.object(HygieneIssue, "list_for_run")
+def test_list_hygiene_issues_table_level_heading(
+    mock_list, mock_latest, mock_resolve_tg, db_session_mock,
+):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    mock_resolve_tg.return_value = _mock_table_group()
+    mock_latest.return_value = uuid4()
+    mock_list.return_value = ([_list_row(column_name=None)], 1)
+
+    result = list_hygiene_issues(table_group_id=str(uuid4()))
+    assert "on `orders`" in result
+    assert "` in `" not in result
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+@patch.object(HygieneIssue, "list_for_run")
+def test_list_hygiene_issues_priority_unknown_fallback(
+    mock_list, mock_latest, mock_resolve_tg, db_session_mock,
+):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    mock_resolve_tg.return_value = _mock_table_group()
+    mock_latest.return_value = uuid4()
+    mock_list.return_value = ([_list_row(priority=None)], 1)
+
+    assert "[Unknown]" in list_hygiene_issues(table_group_id=str(uuid4()))
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+@patch.object(HygieneIssue, "list_for_run")
+def test_list_hygiene_issues_empty(
+    mock_list, mock_latest, mock_resolve_tg, db_session_mock,
+):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    mock_resolve_tg.return_value = _mock_table_group()
+    mock_latest.return_value = uuid4()
+    mock_list.return_value = ([], 0)
+
+    result = list_hygiene_issues(table_group_id=str(uuid4()))
+    assert "_No hygiene issues match the supplied filters._" in result
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+@patch.object(HygieneIssue, "list_for_run")
+def test_list_hygiene_issues_pagination_footer(
+    mock_list, mock_latest, mock_resolve_tg, db_session_mock,
+):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    mock_resolve_tg.return_value = _mock_table_group()
+    mock_latest.return_value = uuid4()
+    mock_list.return_value = ([_list_row(), _list_row()], 100)
+
+    result = list_hygiene_issues(table_group_id=str(uuid4()), limit=2, page=1)
+    assert "Showing 1–2 of 100" in result  # noqa: RUF001 — page-info formatter uses EN DASH
+    assert "page=2" in result
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+@patch.object(HygieneIssue, "list_for_run")
+def test_list_hygiene_issues_disposition_default_coalesces_to_confirmed(
+    mock_list, mock_latest, mock_resolve_tg, db_session_mock,
+):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    mock_resolve_tg.return_value = _mock_table_group()
+    mock_latest.return_value = uuid4()
+    mock_list.return_value = ([], 0)
+
+    list_hygiene_issues(table_group_id=str(uuid4()))
+
+    sql = _compiled_clauses(mock_list.call_args)
+    assert "coalesce(profile_anomaly_results.disposition" in sql
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+def test_list_hygiene_issues_invalid_disposition(mock_latest, mock_resolve_tg, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    mock_resolve_tg.return_value = _mock_table_group()
+    mock_latest.return_value = uuid4()
+
+    with pytest.raises(MCPUserError, match="Invalid disposition"):
+        list_hygiene_issues(table_group_id=str(uuid4()), disposition="bogus")
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+def test_list_hygiene_issues_invalid_quality_dimension(mock_latest, mock_resolve_tg, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    mock_resolve_tg.return_value = _mock_table_group()
+    mock_latest.return_value = uuid4()
+
+    with pytest.raises(MCPUserError, match="Invalid quality_dimension"):
+        list_hygiene_issues(table_group_id=str(uuid4()), quality_dimension="X")
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+def test_list_hygiene_issues_invalid_impact_dimension(mock_latest, mock_resolve_tg, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    mock_resolve_tg.return_value = _mock_table_group()
+    mock_latest.return_value = uuid4()
+
+    with pytest.raises(MCPUserError, match="Invalid impact_dimension"):
+        list_hygiene_issues(table_group_id=str(uuid4()), impact_dimension="X")
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+def test_list_hygiene_issues_invalid_likelihood(mock_latest, mock_resolve_tg, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    mock_resolve_tg.return_value = _mock_table_group()
+    mock_latest.return_value = uuid4()
+
+    with pytest.raises(MCPUserError, match="Invalid issue_likelihood"):
+        list_hygiene_issues(table_group_id=str(uuid4()), issue_likelihood=["Bogus"])
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+def test_list_hygiene_issues_invalid_pii_risk(mock_latest, mock_resolve_tg, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    mock_resolve_tg.return_value = _mock_table_group()
+    mock_latest.return_value = uuid4()
+
+    with pytest.raises(MCPUserError, match="Invalid pii_risk"):
+        list_hygiene_issues(table_group_id=str(uuid4()), pii_risk=["Low"])
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+@patch.object(HygieneIssue, "list_for_run")
+def test_list_hygiene_issues_passes_project_codes_clause(
+    mock_list, mock_latest, mock_resolve_tg, db_session_mock,
+):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    mock_resolve_tg.return_value = _mock_table_group()
+    mock_latest.return_value = uuid4()
+    mock_list.return_value = ([], 0)
+
+    list_hygiene_issues(table_group_id=str(uuid4()))
+
+    sql = _compiled_clauses(mock_list.call_args)
+    assert "profile_anomaly_results.project_code IN" in sql
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+def test_list_hygiene_issues_invalid_page(mock_resolve_tg, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    mock_resolve_tg.return_value = _mock_table_group()
+
+    with pytest.raises(MCPUserError, match="Invalid page"):
+        list_hygiene_issues(table_group_id=str(uuid4()), page=0)
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+def test_list_hygiene_issues_limit_above_max(mock_resolve_tg, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    mock_resolve_tg.return_value = _mock_table_group()
+
+    with pytest.raises(MCPUserError, match="between 1 and 200"):
+        list_hygiene_issues(table_group_id=str(uuid4()), limit=201)
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+@patch.object(HygieneIssue, "list_for_run")
+def test_list_hygiene_issues_pii_redacted_when_no_view_pii(
+    mock_list, mock_latest, mock_resolve_tg, db_session_mock,
+):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    mock_resolve_tg.return_value = _mock_table_group()
+    mock_latest.return_value = uuid4()
+    row = _list_row(detail_redactable=True, pii_flag="B/NAME/Individual", detail="ssn=123")
+    mock_list.return_value = ([row], 1)
+
+    # Default mcp_user has role_a; TEST_PERM_MATRIX has no "view_pii" entry → redaction applies.
+    result = list_hygiene_issues(table_group_id=str(uuid4()))
+    assert PII_REDACTED in result
+    assert "ssn=123" not in result
+
+
+@patch("testgen.mcp.permissions._compute_project_permissions")
+@patch("testgen.mcp.tools.hygiene_issues.resolve_table_group")
+@patch.object(ProfilingRun, "get_latest_complete_je_id_for_table_group")
+@patch.object(HygieneIssue, "list_for_run")
+def test_list_hygiene_issues_pii_visible_when_view_pii_granted(
+    mock_list, mock_latest, mock_resolve_tg, mock_compute, db_session_mock,
+):
+    from testgen.mcp.tools.hygiene_issues import list_hygiene_issues
+
+    perms = MagicMock(spec=ProjectPermissions)
+    perms.allowed_codes = ["demo"]
+    perms.codes_allowed_to.return_value = ["demo"]
+    perms.has_access.side_effect = lambda code: code == "demo"
+    mock_compute.return_value = perms
+
+    mock_resolve_tg.return_value = _mock_table_group()
+    mock_latest.return_value = uuid4()
+    row = _list_row(detail_redactable=True, pii_flag="B/NAME/Individual", detail="ssn=123")
+    mock_list.return_value = ([row], 1)
+
+    result = list_hygiene_issues(table_group_id=str(uuid4()))
+    assert "ssn=123" in result
+    assert PII_REDACTED not in result
+
+
+# ---------------------------------------------------------------------------
+# get_hygiene_issue
+# ---------------------------------------------------------------------------
+
+
+def test_get_hygiene_issue_invalid_uuid(db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import get_hygiene_issue
+
+    with pytest.raises(MCPUserError, match="not a valid UUID"):
+        get_hygiene_issue(issue_id="bogus")
+
+
+@patch.object(HygieneIssue, "get_with_context")
+def test_get_hygiene_issue_not_found_collapses_to_not_accessible(mock_get, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import get_hygiene_issue
+
+    mock_get.return_value = None
+
+    with pytest.raises(MCPResourceNotAccessible):
+        get_hygiene_issue(issue_id=str(uuid4()))
+
+
+@patch.object(HygieneIssue, "get_with_context")
+def test_get_hygiene_issue_passes_project_codes_clause(mock_get, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import get_hygiene_issue
+
+    mock_get.return_value = _detail_row()
+
+    get_hygiene_issue(issue_id=str(uuid4()))
+
+    sql = _compiled_clauses(mock_get.call_args)
+    assert "profile_anomaly_results.project_code IN" in sql
+
+
+@patch.object(HygieneIssue, "get_with_context")
+def test_get_hygiene_issue_renders_full_detail(mock_get, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import get_hygiene_issue
+
+    mock_get.return_value = _detail_row()
+
+    result = get_hygiene_issue(issue_id=str(uuid4()))
+
+    assert "[Definite]" in result
+    assert "Issue ID" in result
+    assert "Schema" in result
+    assert "Table" in result
+    assert "Column" in result
+    assert "Impact Dimension" in result
+    assert "Quality Dimension" in result
+    assert "Disposition" in result
+    assert "## Suggested Action" in result
+    assert "Suggested action body." in result
+    assert "## Issue Type Description" in result
+    assert "Description body." in result
+    assert "## Column Profile" in result
+    assert "## Profiling Run" in result
+
+
+@patch.object(HygieneIssue, "get_with_context")
+def test_get_hygiene_issue_table_level_omits_column(mock_get, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import get_hygiene_issue
+
+    mock_get.return_value = _detail_row(column_name=None)
+    result = get_hygiene_issue(issue_id=str(uuid4()))
+
+    assert "on `orders`" in result
+    assert "` in `" not in result
+
+
+@patch.object(HygieneIssue, "get_with_context")
+def test_get_hygiene_issue_omits_column_profile_when_no_data(mock_get, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import get_hygiene_issue
+
+    mock_get.return_value = _detail_row(
+        column_general_type=None, column_db_data_type=None,
+        column_record_ct=None, column_null_value_ct=None, column_distinct_value_ct=None,
+    )
+    result = get_hygiene_issue(issue_id=str(uuid4()))
+    assert "## Column Profile" not in result
+
+
+@patch.object(HygieneIssue, "get_with_context")
+def test_get_hygiene_issue_renders_null_rate(mock_get, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import get_hygiene_issue
+
+    mock_get.return_value = _detail_row(column_record_ct=1000, column_null_value_ct=50)
+    result = get_hygiene_issue(issue_id=str(uuid4()))
+    assert "Null Rate" in result
+    assert "5.00%" in result
+
+
+@patch.object(HygieneIssue, "get_with_context")
+def test_get_hygiene_issue_skips_null_rate_when_record_ct_zero(mock_get, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import get_hygiene_issue
+
+    mock_get.return_value = _detail_row(column_record_ct=0, column_null_value_ct=0)
+    result = get_hygiene_issue(issue_id=str(uuid4()))
+    assert "Null Rate" not in result
+
+
+# ---------------------------------------------------------------------------
+# update_hygiene_issue
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def disposition_perms():
+    """Grant 'disposition' permission on demo (the conftest's matrix omits it)."""
+    perms = MagicMock(spec=ProjectPermissions)
+    perms.memberships = {"demo": "role_a"}
+    perms.permission = "disposition"
+    perms.username = "test_user"
+    perms.allowed_codes = ["demo"]
+    perms.codes_allowed_to.return_value = ["demo"]
+    perms.has_access.side_effect = lambda code: code in ["demo"]
+
+    with patch("testgen.mcp.permissions._compute_project_permissions", return_value=perms):
+        yield perms
+
+
+def test_update_hygiene_issue_invalid_uuid(db_session_mock, disposition_perms):
+    from testgen.mcp.tools.hygiene_issues import update_hygiene_issue
+
+    with pytest.raises(MCPUserError, match="not a valid UUID"):
+        update_hygiene_issue(issue_id="bogus", disposition="Confirmed")
+
+
+def test_update_hygiene_issue_invalid_disposition(db_session_mock, disposition_perms):
+    from testgen.mcp.tools.hygiene_issues import update_hygiene_issue
+
+    with pytest.raises(MCPUserError, match="Invalid disposition"):
+        update_hygiene_issue(issue_id=str(uuid4()), disposition="Bogus")
+
+
+@patch.object(HygieneIssue, "update_disposition")
+def test_update_hygiene_issue_muted_maps_to_inactive(mock_update, db_session_mock, disposition_perms):
+    from testgen.mcp.tools.hygiene_issues import update_hygiene_issue
+
+    mock_update.return_value = True
+    update_hygiene_issue(issue_id=str(uuid4()), disposition="Muted")
+
+    args = mock_update.call_args.args
+    assert args[1] == Disposition.INACTIVE
+
+
+@patch.object(HygieneIssue, "update_disposition")
+def test_update_hygiene_issue_returns_success_markdown(mock_update, db_session_mock, disposition_perms):
+    from testgen.mcp.tools.hygiene_issues import update_hygiene_issue
+
+    mock_update.return_value = True
+    issue_id = str(uuid4())
+    result = update_hygiene_issue(issue_id=issue_id, disposition="Dismissed")
+
+    assert "Updated hygiene issue" in result
+    assert issue_id in result
+    assert "Dismissed" in result
+
+
+@patch.object(HygieneIssue, "update_disposition")
+def test_update_hygiene_issue_not_updated_collapses_to_not_accessible(
+    mock_update, db_session_mock, disposition_perms,
+):
+    from testgen.mcp.tools.hygiene_issues import update_hygiene_issue
+
+    mock_update.return_value = False
+    with pytest.raises(MCPResourceNotAccessible):
+        update_hygiene_issue(issue_id=str(uuid4()), disposition="Confirmed")
+
+
+@patch.object(HygieneIssue, "update_disposition")
+def test_update_hygiene_issue_passes_project_scope_clause(
+    mock_update, db_session_mock, disposition_perms,
+):
+    from testgen.mcp.tools.hygiene_issues import update_hygiene_issue
+
+    mock_update.return_value = True
+    update_hygiene_issue(issue_id=str(uuid4()), disposition="Confirmed")
+
+    # Trailing args after (issue_uuid, db_disposition) are the *clauses
+    clauses = mock_update.call_args.args[2:]
+    sql = "\n".join(str(c.compile(dialect=postgresql.dialect())) for c in clauses)
+    assert "profile_anomaly_results.project_code IN" in sql
+
+
+def test_update_hygiene_issue_uses_disposition_permission():
+    """Pin the permission name. Silent downgrade to 'view' would be a write-side leak."""
+    import testgen.mcp.tools.hygiene_issues as mod
+
+    closure = {c.cell_contents for c in mod.update_hygiene_issue.__wrapped__.__closure__}
+    assert "disposition" in closure
+
+
+# ---------------------------------------------------------------------------
+# search_hygiene_issues
+# ---------------------------------------------------------------------------
+
+
+@patch.object(HygieneIssue, "search")
+def test_search_no_args_uses_allowed_codes(mock_search, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import search_hygiene_issues
+
+    mock_search.return_value = ([], 0)
+    search_hygiene_issues()
+
+    sql = _compiled_clauses(mock_search.call_args)
+    assert "profile_anomaly_results.project_code IN" in sql
+
+
+@patch("testgen.mcp.permissions._compute_project_permissions")
+def test_search_inaccessible_project_rejected(mock_compute, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import search_hygiene_issues
+
+    perms = MagicMock(spec=ProjectPermissions)
+    perms.memberships = {"demo": "role_a"}
+    perms.permission = "view"
+    perms.username = "test_user"
+    perms.allowed_codes = ["demo"]
+    perms.has_access.side_effect = lambda c: c == "demo"
+    perms.verify_access.side_effect = lambda _code, not_found: (_ for _ in ()).throw(not_found)
+    mock_compute.return_value = perms
+
+    with pytest.raises(MCPResourceNotAccessible):
+        search_hygiene_issues(project_code="forbidden")
+
+
+def test_search_invalid_tg_uuid(db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import search_hygiene_issues
+
+    with pytest.raises(MCPUserError, match="not a valid UUID"):
+        search_hygiene_issues(table_group_id="bogus")
+
+
+@patch.object(HygieneIssue, "search")
+def test_search_table_group_filter_in_clause(mock_search, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import search_hygiene_issues
+
+    mock_search.return_value = ([], 0)
+    search_hygiene_issues(table_group_id=str(uuid4()))
+
+    sql = _compiled_clauses(mock_search.call_args)
+    assert "profile_anomaly_results.table_groups_id =" in sql
+
+
+@patch.object(HygieneIssue, "search")
+def test_search_since_translates_to_started_at_clause(mock_search, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import search_hygiene_issues
+
+    mock_search.return_value = ([], 0)
+    search_hygiene_issues(since="7 days")
+
+    sql = _compiled_clauses(mock_search.call_args)
+    assert "job_executions.started_at >=" in sql
+    assert "profiling_runs.profiling_starttime" not in sql
+
+
+def test_search_invalid_since_rejected(db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import search_hygiene_issues
+
+    with pytest.raises(MCPUserError):
+        search_hygiene_issues(since="not a date")
+
+
+@patch.object(HygieneIssue, "search")
+def test_search_happy_path_renders_search_row_fields(mock_search, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import search_hygiene_issues
+
+    je_id = uuid4()
+    mock_search.return_value = ([_search_row(table_groups_name="dim-tables", job_execution_id=je_id)], 1)
+
+    result = search_hygiene_issues()
+
+    assert "Hygiene Issue Search" in result
+    assert "Table Group" in result
+    assert "dim-tables" in result
+    assert "Profiling Run" in result
+    assert str(je_id) in result
+    assert "Run Date" in result
+
+
+@patch.object(HygieneIssue, "search")
+def test_search_empty_returns_no_match_message(mock_search, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import search_hygiene_issues
+
+    mock_search.return_value = ([], 0)
+    result = search_hygiene_issues()
+    assert "_No hygiene issues match the supplied filters._" in result
+
+
+def test_search_invalid_page(db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import search_hygiene_issues
+
+    with pytest.raises(MCPUserError, match="Invalid page"):
+        search_hygiene_issues(page=0)
+
+
+def test_search_invalid_limit(db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import search_hygiene_issues
+
+    with pytest.raises(MCPUserError, match="between 1 and 200"):
+        search_hygiene_issues(limit=201)
+
+
+@patch("testgen.mcp.tools.hygiene_issues.resolve_issue_type")
+@patch.object(HygieneIssue, "search")
+def test_search_issue_type_filter_resolved(mock_search, mock_resolve, db_session_mock):
+    from testgen.mcp.tools.hygiene_issues import search_hygiene_issues
+
+    mock_resolve.return_value = "1015"
+    mock_search.return_value = ([], 0)
+    search_hygiene_issues(issue_type="Personally Identifiable Information")
+
+    sql = _compiled_clauses(mock_search.call_args)
+    assert "profile_anomaly_results.anomaly_id =" in sql
diff --git a/tests/unit/mcp/test_tools_reference.py b/tests/unit/mcp/test_tools_reference.py
index 519e6e61..55f6b508 100644
--- a/tests/unit/mcp/test_tools_reference.py
+++ b/tests/unit/mcp/test_tools_reference.py
@@ -91,7 +91,88 @@ def test_glossary_resource():
 
     assert "Entity Hierarchy" in result
     assert "Result Statuses" in result
-    assert "Data Quality Dimensions" in result
+    assert "Quality Dimensions" in result
     assert "Test Scopes" in result
     assert "Disposition" in result
     assert "Monitor Types" not in result
+
+    # Hygiene-issue additions (TG-1029):
+    assert "Profiling Run" in result
+    assert "Hygiene Issue" in result
+    assert "## Hygiene Issue Likelihood" in result
+    assert "Definite" in result
+    assert "Likely" in result
+    assert "Possible" in result
+    assert "PII Risk" in result
+    # All three disposition values defined under one section:
+    assert "Confirmed" in result
+    assert "Dismissed" in result
+    assert "Muted" in result
+    # Recency was added during the migration:
+    assert "Recency" in result
+    # Impact Dimensions section + all four values:
+    assert "## Impact Dimensions" in result
+    assert "Reliability" in result
+    assert "Conformance" in result
+    assert "Regularity" in result
+    assert "Usability" in result
+
+
+@patch("testgen.mcp.tools.reference.HygieneIssueType")
+def test_hygiene_issue_types_resource_basic(mock_type_cls, db_session_mock):
+    t1 = MagicMock()
+    t1.name = "Personally Identifiable Information"
+    t1.impact_dimension = "Conformance"
+    t1.dq_dimension = "Validity"
+    t1.likelihood = "Potential PII"
+    t1.description = "PII description."
+    t1.suggested_action = "Handle PII carefully."
+    t2 = MagicMock()
+    t2.name = "Non-Standard Blank Values"
+    t2.impact_dimension = "Usability"
+    t2.dq_dimension = "Completeness"
+    t2.likelihood = "Definite"
+    t2.description = "Blanks description."
+    t2.suggested_action = "Cleanse blanks."
+    mock_type_cls.select_where.return_value = [t1, t2]
+
+    from testgen.mcp.tools.reference import hygiene_issue_types_resource
+
+    result = hygiene_issue_types_resource()
+
+    # Header order: Issue Type | Impact Dimension | Quality Dimension | Likelihood | Description | Suggested Action
+    header_line = next(line for line in result.split("\n") if line.startswith("| Issue Type"))
+    assert header_line == "| Issue Type | Impact Dimension | Quality Dimension | Likelihood | Description | Suggested Action |"
+    # All values surface:
+    assert "Personally Identifiable Information" in result
+    assert "Non-Standard Blank Values" in result
+    assert "Potential PII" in result
+    assert "Definite" in result
+    assert "Handle PII carefully." in result
+
+
+@patch("testgen.mcp.tools.reference.HygieneIssueType")
+def test_hygiene_issue_types_resource_orders_by_name(mock_type_cls, db_session_mock):
+    from testgen.common.models.hygiene_issue import HygieneIssueType
+    from testgen.mcp.tools.reference import hygiene_issue_types_resource
+
+    mock_type_cls.select_where.return_value = []
+    mock_type_cls.name = HygieneIssueType.name
+
+    hygiene_issue_types_resource()
+
+    # ``select_where`` was called with order_by tuple containing the name column.
+    mock_type_cls.select_where.assert_called_once()
+    kwargs = mock_type_cls.select_where.call_args.kwargs
+    assert "order_by" in kwargs
+    assert kwargs["order_by"][0] is HygieneIssueType.name
+
+
+@patch("testgen.mcp.tools.reference.HygieneIssueType")
+def test_hygiene_issue_types_resource_empty(mock_type_cls, db_session_mock):
+    mock_type_cls.select_where.return_value = []
+
+    from testgen.mcp.tools.reference import hygiene_issue_types_resource
+
+    result = hygiene_issue_types_resource()
+    assert "No hygiene issue types found" in result

From fde7321abc5ac5c7d9488859f3bbc85aff5df430 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Wed, 6 May 2026 01:41:07 -0400
Subject: [PATCH 08/11] fix(standalone): graceful shutdown of embedded postgres
 on signal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`testgen run-app` was leaving orphan postgres processes after Ctrl+C in
standalone (pip) mode, breaking `tg delete` (data dir locked).

pixeltable-pgserver tracks handles via a disk-backed PID registry at
`<pgdata>/.handle_pids.json` and only stops postgres when the calling PID
is the last one registered. Two paths were leaking PIDs into the registry:

1. Each `run-app all` child re-entered `cli()` and called `start_server()`
   itself, registering its own PID alongside the parent's.
2. On Windows, `_forward_signal_to_child` used `terminate()`
   (TerminateProcess), so children's atexit never ran — their PIDs
   stayed in the registry forever, and the parent's cleanup silently
   no-op'd.

Fix:

- `cli()` now calls `ensure_standalone_setup(uri)` instead of
  `start_server()` when `_TG_STANDALONE_URI` is inherited, so children
  attach to the parent's pgserver without registering a PID.
- `run_app("all")` injects the parent's pgserver URI into `child_env`.
- `run_ui()` Streamlit env-var assembly falls back to the inherited env
  var when this process doesn't own pgserver.
- New `_subprocess_spawn_kwargs()` helper: POSIX `start_new_session`,
  Windows `CREATE_NEW_PROCESS_GROUP`. Applied at both `Popen` sites.
- Windows branch of `_forward_signal_to_child` now sends
  `CTRL_BREAK_EVENT` instead of `terminate()`, so children's atexit and
  SIGBREAK handlers run.
- New `_install_shutdown_handler()` registers SIGINT/SIGTERM/(SIGBREAK)
  in one call. Used in `run_ui`, `run_app("all")`, and `Scheduler.run`.
- `run_app("all")` children-watcher loop now iterates a snapshot of
  `children` so simultaneous child exits don't leave a sibling un-reaped.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 testgen/__main__.py                | 78 ++++++++++++++++++++++++------
 testgen/scheduler/cli_scheduler.py |  4 ++
 2 files changed, 67 insertions(+), 15 deletions(-)

diff --git a/testgen/__main__.py b/testgen/__main__.py
index 1841636a..3b8d7f12 100644
--- a/testgen/__main__.py
+++ b/testgen/__main__.py
@@ -97,6 +97,7 @@ def _capped(level):
 from testgen.common.notifications.base import smtp_configured
 from testgen.common.standalone_postgres import (
     STANDALONE_URI_ENV_VAR,
+    ensure_standalone_setup,
     get_server_uri,
     is_standalone_mode,
 )
@@ -116,12 +117,41 @@ def _capped(level):
 CHILDREN_POLL_INTERVAL = 10
 
 
+def _subprocess_spawn_kwargs() -> dict:
+    """Spawn flags that let the parent send a *catchable* shutdown signal to a child.
+
+    On POSIX, ``start_new_session`` puts the child in its own session so the
+    console SIGINT only hits the parent. On Windows, ``CREATE_NEW_PROCESS_GROUP``
+    is required so the parent can deliver ``CTRL_BREAK_EVENT`` to the child —
+    without it, the only options are ``terminate()`` (uncatchable) or relying on
+    the console-wide Ctrl+C broadcast (racy).
+    """
+    if sys.platform == "win32":
+        return {"creationflags": subprocess.CREATE_NEW_PROCESS_GROUP}
+    return {"start_new_session": True}
+
+
+def _install_shutdown_handler(handler) -> None:
+    """Register *handler* for SIGINT, SIGTERM, and (on Windows) SIGBREAK.
+
+    Windows children spawned with ``CREATE_NEW_PROCESS_GROUP`` receive
+    ``CTRL_BREAK_EVENT`` (delivered as SIGBREAK) instead of console Ctrl+C —
+    treat it the same as SIGTERM so atexit hooks run and pgserver's PID list
+    stays clean.
+    """
+    signal.signal(signal.SIGINT, handler)
+    signal.signal(signal.SIGTERM, handler)
+    if sigbreak := getattr(signal, "SIGBREAK", None):
+        signal.signal(sigbreak, handler)
+
+
 def _forward_signal_to_child(child: subprocess.Popen, signum: int) -> None:
-    # On POSIX, forward the signal verbatim. On Windows, subprocess.send_signal
-    # rejects everything except SIGTERM / CTRL_C_EVENT / CTRL_BREAK_EVENT, so
-    # fall back to terminate() — equivalent to TerminateProcess().
+    # POSIX: forward the signal verbatim. Windows: send CTRL_BREAK_EVENT so the
+    # child's atexit hooks run (it deregisters from pgserver's PID list, lets
+    # streamlit/uvicorn shut down their event loops, etc.). The child must have
+    # been spawned with CREATE_NEW_PROCESS_GROUP — see _subprocess_spawn_kwargs.
     if sys.platform == "win32":
-        child.terminate()
+        child.send_signal(signal.CTRL_BREAK_EVENT)
     else:
         child.send_signal(signum)
 
@@ -169,7 +199,14 @@ def format_epilog(self, _ctx: Context, formatter: click.HelpFormatter) -> None:
 @click.pass_context
 def cli(ctx: Context, verbose: bool):
     if is_standalone_mode():
-        start_standalone_postgres()
+        # Children spawned by `run-app all` (and the Streamlit subprocess) inherit the
+        # parent's pgserver URI. They must NOT call get_server() themselves — each call
+        # registers the calling PID in pgserver's on-disk handle list, and any PID left
+        # over after a hard kill prevents the parent from actually stopping postgres.
+        if inherited_uri := os.environ.get(STANDALONE_URI_ENV_VAR):
+            ensure_standalone_setup(inherited_uri)
+        else:
+            start_standalone_postgres()
 
     if verbose:
         configure_logging(level=logging.DEBUG)
@@ -924,14 +961,16 @@ def init_ui():
     app_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ui/app.py")
 
     # In standalone mode, pass the pgserver URI to the Streamlit subprocess
-    # so it can connect without acquiring the pgserver file lock.
+    # so it can connect without acquiring the pgserver file lock. When this
+    # process is itself a child of `run-app all`, the URI is already in the
+    # inherited environment — fall back to that so Streamlit still gets it.
     child_env = {**os.environ, "TG_JOB_SOURCE": "UI"}
     if is_standalone_mode():
-        server_uri = get_server_uri()
+        server_uri = get_server_uri() or os.environ.get(STANDALONE_URI_ENV_VAR)
         if server_uri:
-            child_env = {**os.environ, "TG_JOB_SOURCE": "UI", STANDALONE_URI_ENV_VAR: server_uri}
+            child_env[STANDALONE_URI_ENV_VAR] = server_uri
 
-    process= subprocess.Popen(
+    process = subprocess.Popen(
         [
             sys.executable,
             "-m",
@@ -950,12 +989,12 @@ def init_ui():
             f"{'--debug' if settings.IS_DEBUG else ''}",
         ],
         env=child_env,
+        **_subprocess_spawn_kwargs(),
     )
     def term_ui(signum, _):
         LOG.info(f"Sending termination signal {signum} to Testgen UI")
         _forward_signal_to_child(process, signum)
-    signal.signal(signal.SIGINT, term_ui)
-    signal.signal(signal.SIGTERM, term_ui)
+    _install_shutdown_handler(term_ui)
     status_code = process.wait()
     LOG.log(logging.ERROR if status_code != 0 else logging.INFO, f"Testgen UI exited with status code {status_code}")
 
@@ -980,8 +1019,15 @@ def run_app(module):
             run_server()
 
         case "all":
+            child_env = os.environ.copy()
+            if is_standalone_mode() and (server_uri := get_server_uri()):
+                child_env[STANDALONE_URI_ENV_VAR] = server_uri
             children = [
-                subprocess.Popen([sys.executable, "-m", "testgen", "run-app", m], start_new_session=True)
+                subprocess.Popen(
+                    [sys.executable, "-m", "testgen", "run-app", m],
+                    env=child_env,
+                    **_subprocess_spawn_kwargs(),
+                )
                 for m in APP_MODULES
             ]
 
@@ -989,8 +1035,7 @@ def term_children(signum, _):
                 for child in children:
                     _forward_signal_to_child(child, signum)
 
-            signal.signal(signal.SIGINT, term_children)
-            signal.signal(signal.SIGTERM, term_children)
+            _install_shutdown_handler(term_children)
 
             terminating = False
             while children:
@@ -999,7 +1044,10 @@ def term_children(signum, _):
                 except subprocess.TimeoutExpired:
                     pass
 
-                for child in children:
+                # Iterate a snapshot — `children.remove(child)` inside a live iteration
+                # would skip the next element and could leave a sibling un-reaped during
+                # the simultaneous-exit case we're explicitly hardening for here.
+                for child in list(children):
                     if child.poll() is not None:
                         children.remove(child)
                         if not terminating:
diff --git a/testgen/scheduler/cli_scheduler.py b/testgen/scheduler/cli_scheduler.py
index bd2719ec..ddc3c9ec 100644
--- a/testgen/scheduler/cli_scheduler.py
+++ b/testgen/scheduler/cli_scheduler.py
@@ -195,6 +195,10 @@ def sig_handler(signum, _):
 
         signal.signal(signal.SIGINT, sig_handler)
         signal.signal(signal.SIGTERM, sig_handler)
+        # Windows: parent forwards CTRL_BREAK_EVENT (delivered as SIGBREAK) when the
+        # child is spawned in its own process group, so handle it the same way.
+        if sigbreak := getattr(signal, "SIGBREAK", None):
+            signal.signal(sigbreak, sig_handler)
 
         try:
             self.start(datetime.now(UTC))

From 043da3c8c658915b206a2726a484d1a55f95a407 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Wed, 6 May 2026 02:26:54 -0400
Subject: [PATCH 09/11] docs: simplify README install section to point at docs
 site
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The dk-installer now offers Docker and pip install modes, both fully
documented at docs.datakitchen.io. Replace the long install instructions
in the README — which had drifted out of sync with the installer and
the docs — with a brief pointer to the install pages.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 README.md | 195 ++----------------------------------------------------
 1 file changed, 4 insertions(+), 191 deletions(-)

diff --git a/README.md b/README.md
index 5383516d..b3be60ad 100644
--- a/README.md
+++ b/README.md
@@ -29,199 +29,12 @@ A <b>single place to manage Data Quality</b> across data sets, locations, and te
 <img alt="DataKitchen Open Source Data Quality TestGen Features - Single Place" src="https://datakitchen.io/wp-content/uploads/2024/07/Screenshot-dataops-testgen-centralize.png" width="70%">
 </p>
 
-## Installation with dk-installer (recommended)
+## Installation
 
-The [dk-installer](https://github.com/DataKitchen/data-observability-installer/?tab=readme-ov-file#install-the-testgen-application) program installs DataOps Data Quality TestGen as a [Docker Compose](https://docs.docker.com/compose/) application. This is the recommended mode of installation as Docker encapsulates and isolates the application from other software on your machine and does not require you to manage Python dependencies.
+The [dk-installer](https://github.com/DataKitchen/data-observability-installer/?tab=readme-ov-file#install-the-testgen-application) program installs TestGen in either Docker or pip mode. For complete instructions, see the documentation:
 
-### Install the prerequisite software
-
-| Software                | Tested Versions               | Command to check version                |
-|-------------------------|-------------------------|-------------------------------|
-| [Python](https://www.python.org/downloads/) <br/>- Most Linux and macOS systems have Python pre-installed. <br/>- On Windows machines, you will need to download and install it.  <br/> Why Python?  To run the installer.       | 3.9, 3.10, 3.11, 3.12                | `python3 --version`                |
-| [Docker](https://docs.docker.com/get-docker/) <br/>[Docker Compose](https://docs.docker.com/compose/install/)  <br/> Why Docker?  Docker lets you try TestGen without affecting your local software environment.  All the dependencies TestGen needs are isolated in its own container, so installation is easy and insulated.  | 26.1, 27.5, 28.0 <br/> 2.32, 2.33, 2.34        | `docker -v` <br/> `docker compose version`         |
-
-### Download the installer
-
-On Unix-based operating systems, use the following command to download it to the current directory. We recommend creating a new, empty directory.
-
-```shell
-curl -o dk-installer.py 'https://raw.githubusercontent.com/DataKitchen/data-observability-installer/main/dk-installer.py'
-```
-
-* Alternatively, you can manually download the [`dk-installer.py`](https://github.com/DataKitchen/data-observability-installer/blob/main/dk-installer.py) file from the [data-observability-installer](https://github.com/DataKitchen/data-observability-installer) repository.
-* All commands listed below should be run from the folder containing this file.
-* For usage help and command options, run `python3 dk-installer.py --help` or `python3 dk-installer.py <command> --help`.
-
-### Install the TestGen application
-
-The installation downloads the latest Docker images for TestGen and deploys a new Docker Compose application. The process may take 5~10 minutes depending on your machine and network connection.
-
-```shell
-python3 dk-installer.py tg install
-```
-
-The `--port` option may be used to set a custom localhost port for the application (default: 8501).
-
-To enable SSL for HTTPS support, use the `--ssl-cert-file` and `--ssl-key-file` options to specify local file paths to your SSL certificate and key files.
-
-Once the installation completes, verify that you can login to the UI with the URL and credentials provided in the output.
-
-### Optional: Run the TestGen demo setup
-
-The [Data Observability quickstart](https://docs.datakitchen.io/tutorials/quickstart-demo/) walks you through DataOps Data Quality TestGen capabilities to demonstrate how it covers critical use cases for data and analytic teams.
-
-```shell
-python3 dk-installer.py tg run-demo
-```
-
-In the TestGen UI, you will see that new data profiling and test results have been generated.
-
-## Installation with pip
-
-As an alternative to the Docker Compose [installation with dk-installer (recommended)](#installation-with-dk-installer-recommended), DataOps Data Quality TestGen can also be installed as a Python package via [pip](https://pip.pypa.io/en/stable/). This mode of installation uses the [dataops-testgen](https://pypi.org/project/dataops-testgen/) package published to PyPI, and it requires a PostgreSQL instance to be provisioned for the application database.
-
-### Install the prerequisite software
-
-| Software                                                                                                                                                                         | Tested Versions  | Command to check version               |
-|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------|------------------------------|
-| [Python](https://www.python.org/downloads/) <br/>- Most Linux and macOS systems have Python pre-installed. <br/>- On Windows machines, you will need to download and install it. | 3.11, 3.12, 3.13 | `python3 --version`               |
-| [PostgreSQL](https://www.postgresql.org/download/)                                                                                                                                                                     | 14.1, 15.8, 16.4       | `psql --version`|
-
-### Install the TestGen package
-
-We recommend using a Python virtual environment to avoid any dependency conflicts with other applications installed on your machine. The [venv](https://docs.python.org/3/library/venv.html#creating-virtual-environments) module, which is part of the Python standard library, or other third-party tools, like [virtualenv](https://virtualenv.pypa.io/en/latest/) or [conda](https://docs.conda.io/en/latest/), can be used.
-
-Create and activate a virtual environment with a TestGen-compatible version of Python (`>=3.11`). The steps may vary based on your operating system and Python installation - the [Python packaging user guide](https://packaging.python.org/en/latest/tutorials/installing-packages/) is a useful reference.
-
-_On Linux/Mac_
-```shell
-python3 -m venv venv
-source venv/bin/activate
-```
-
-_On Windows_
-```powershell
-py -3.13 -m venv venv
-venv\Scripts\activate
-```
-
-Within the virtual environment, install the TestGen package with pip.
-```shell
-pip install dataops-testgen
-```
-
-Verify that the [_testgen_ command line](https://docs.datakitchen.io/testgen/cli-reference/) works.
-```shell
-testgen --help
-```
-
-### Set up the application database in PostgresSQL
-
-Create a `local.env` file with the following environment variables, replacing the `<value>` placeholders with appropriate values. Refer to the [TestGen Configuration](docs/configuration.md) document for more details, defaults, and other supported configuration.
-```shell
-# Connection parameters for the PostgreSQL server
-export TG_METADATA_DB_HOST=<postgres_hostname>
-export TG_METADATA_DB_PORT=<postgres_port>
-
-# Connection credentials for the PostgreSQL server
-# This role must have privileges to create roles, users, database and schema so that the application database can be initialized
-export TG_METADATA_DB_USER=<postgres_username>
-export TG_METADATA_DB_PASSWORD=<postgres_password>
-
-# Set a password and arbitrary string (the "salt") to be used for encrypting secrets in the application database
-export TG_DECRYPT_PASSWORD=<encryption_password>
-export TG_DECRYPT_SALT=<encryption_salt>
-
-# Set credentials for the default admin user to be created for TestGen
-export TESTGEN_USERNAME=<username>
-export TESTGEN_PASSWORD=<password>
-
-# Set an arbitrary base64-encoded string to be used for signing authentication tokens
-export TG_JWT_HASHING_KEY=<base64_key>
-
-# Set an accessible path for storing application logs
-export TESTGEN_LOG_FILE_PATH=<path_for_logs>
-```
-
-Source the file to apply the environment variables. For the Windows equivalent, refer to [this guide](https://bennett4.medium.com/windows-alternative-to-source-env-for-setting-environment-variables-606be2a6d3e1).
-```shell
-source local.env
-```
-
-Make sure the PostgreSQL database server is up and running. Initialize the application database for TestGen.
-```shell
-testgen setup-system-db --yes
-```
-
-### Run the application modules
-
-Run the following command to start TestGen. It will open the browser at [http://localhost:8501](http://localhost:8501).
-
-```shell
-testgen run-app
-```
-
-Verify that you can login to the UI with the `TESTGEN_USERNAME` and `TESTGEN_PASSWORD` values that you configured in the environment variables.
-
-### Optional: Run the TestGen demo setup
-
-The [Data Observability quickstart](https://docs.datakitchen.io/tutorials/quickstart-demo/) walks you through DataOps Data Quality TestGen capabilities to demonstrate how it covers critical use cases for data and analytic teams.
-
-```shell
-testgen quick-start
-```
-
-In the TestGen UI, you will see that new data profiling and test results have been generated.
-
-## Useful Commands
-
-The [dk-installer](https://github.com/DataKitchen/data-observability-installer/?tab=readme-ov-file#install-the-testgen-application) and [docker compose CLI](https://docs.docker.com/compose/reference/) can be used to operate the TestGen application installed using dk-installer. All commands must be run in the same folder that contains the `dk-installer.py` and `docker-compose.yml` files used by the installation.
-
-### Remove demo data
-
-After completing the quickstart, you can remove the demo data from the application with the following command.
-
-```shell
-python3 dk-installer.py tg delete-demo
-```
-
-### Upgrade to latest version
-
-New releases of TestGen are announced on the `#releases` channel on [Data Observability Slack](https://data-observability-slack.datakitchen.io/join), and release notes can be found on the [DataKitchen documentation portal](https://docs.datakitchen.io/testgen/release-notes/). Use the following command to upgrade to the latest released version.
-
- ```shell
- python3 dk-installer.py tg upgrade
- ```
-
-### Uninstall the application
-
-The following command uninstalls the Docker Compose application and removes all data, containers, and images related to TestGen from your machine.
-
-```shell
-python3 dk-installer.py tg delete
-```
-
-### Access the _testgen_ CLI
-
-The [_testgen_ command line](https://docs.datakitchen.io/testgen/cli-reference/) can be accessed within the running container.
-
-```shell
-docker compose exec engine bash
-```
-
-Use `exit` to return to the regular terminal.
-
-### Stop the application
-
-```shell
-docker compose down
-```
-
-### Restart the application
-
-```shell
-docker compose up -d
-```
+* [Install on Mac/Linux](https://docs.datakitchen.io/testgen/get-started/install-on-mac-linux/)
+* [Install on Windows](https://docs.datakitchen.io/testgen/get-started/install-on-windows/)
 
 ## What Next?
 

From 4e7cc45c259949139f17097f8c5a5881767f3a64 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Thu, 14 May 2026 22:47:18 -0400
Subject: [PATCH 10/11] fix(standalone): resolve embedded host/port at
 connection-build time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Windows pgserver picks a fresh ephemeral TCP port on every startup, so the
demo-DB connection row written during quick-start became stale as soon as
run-app started a new pgserver session — "Test Connection" and any target-DB
query failed with "connection refused".

Store a <embedded> sentinel in project_host instead of the live host/port;
resolve_connection_params rewrites it to the live values when standalone mode
is active. Single chokepoint covers UI test-connection, profiling, test
execution, and quick-start increments.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 testgen/commands/run_launch_db_config.py         |  9 +++++----
 testgen/commands/run_quick_start.py              |  9 +++++----
 testgen/common/database/flavor/flavor_service.py | 11 +++++++++--
 testgen/common/standalone_postgres.py            |  7 +++++++
 4 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/testgen/commands/run_launch_db_config.py b/testgen/commands/run_launch_db_config.py
index 11b2257f..f7b69bcb 100644
--- a/testgen/commands/run_launch_db_config.py
+++ b/testgen/commands/run_launch_db_config.py
@@ -9,7 +9,7 @@
 from testgen.common.models import with_database_session
 from testgen.common.read_file import get_template_files
 from testgen.common.read_yaml_metadata_records import import_metadata_records_from_yaml
-from testgen.common.standalone_postgres import get_target_host_port, is_standalone_mode
+from testgen.common.standalone_postgres import EMBEDDED_HOST_SENTINEL, is_standalone_mode
 
 LOG = logging.getLogger("testgen")
 
@@ -28,9 +28,10 @@ def _get_params_mapping() -> dict:
     project_user = settings.PROJECT_DATABASE_USER
     project_password = settings.PROJECT_DATABASE_PASSWORD
     if is_standalone_mode():
-        project_host, server_port = get_target_host_port()
-        if server_port:
-            project_port = server_port
+        # Live host/port are resolved at connection-build time via the sentinel
+        # so the row survives pgserver picking a fresh ephemeral port on Windows.
+        project_host = EMBEDDED_HOST_SENTINEL
+        project_port = ""
         project_user = "postgres"
         project_password = ""
 
diff --git a/testgen/commands/run_quick_start.py b/testgen/commands/run_quick_start.py
index e7a9a84d..5f7389bc 100644
--- a/testgen/commands/run_quick_start.py
+++ b/testgen/commands/run_quick_start.py
@@ -27,7 +27,7 @@
 from testgen.common.models.table_group import TableGroup
 from testgen.common.notifications.base import smtp_configured
 from testgen.common.read_file import read_template_sql_file
-from testgen.common.standalone_postgres import get_target_host_port, is_standalone_mode
+from testgen.common.standalone_postgres import EMBEDDED_HOST_SENTINEL, is_standalone_mode
 
 LOG = logging.getLogger("testgen")
 random.seed(42)
@@ -148,9 +148,10 @@ def _get_settings_params_mapping() -> dict:
     admin_user = settings.DATABASE_ADMIN_USER
     admin_password = settings.DATABASE_ADMIN_PASSWORD
     if is_standalone_mode():
-        host, server_port = get_target_host_port()
-        if server_port:
-            port = server_port
+        # Live host/port are resolved at connection-build time via the sentinel
+        # so the row survives pgserver picking a fresh ephemeral port on Windows.
+        host = EMBEDDED_HOST_SENTINEL
+        port = ""
         admin_user = "postgres"
         admin_password = ""
 
diff --git a/testgen/common/database/flavor/flavor_service.py b/testgen/common/database/flavor/flavor_service.py
index a56ac8ba..dafc7d2a 100644
--- a/testgen/common/database/flavor/flavor_service.py
+++ b/testgen/common/database/flavor/flavor_service.py
@@ -61,13 +61,20 @@ def _decrypt_if_needed(value: Any) -> str | None:
 
 def resolve_connection_params(connection_params: ConnectionParams) -> ResolvedConnectionParams:
     sql_flavor = connection_params.get("sql_flavor") or ""
+    host = connection_params.get("project_host") or ""
+    port = connection_params.get("project_port") or ""
+    # Lazy import to keep the flavor layer free of standalone concerns at module load.
+    from testgen.common.standalone_postgres import EMBEDDED_HOST_SENTINEL, get_target_host_port, is_standalone_mode
+    if host == EMBEDDED_HOST_SENTINEL and is_standalone_mode():
+        host, live_port = get_target_host_port()
+        port = live_port or ""
     return ResolvedConnectionParams(
         url=connection_params.get("url") or "",
         connect_by_url=connection_params.get("connect_by_url", False),
         username=connection_params.get("project_user") or "",
         password=_decrypt_if_needed(connection_params.get("project_pw_encrypted")),
-        host=connection_params.get("project_host") or "",
-        port=connection_params.get("project_port") or "",
+        host=host,
+        port=port,
         dbname=connection_params.get("project_db") or "",
         dbschema=connection_params.get("table_group_schema"),
         sql_flavor=sql_flavor,
diff --git a/testgen/common/standalone_postgres.py b/testgen/common/standalone_postgres.py
index d272eb94..be27e774 100644
--- a/testgen/common/standalone_postgres.py
+++ b/testgen/common/standalone_postgres.py
@@ -23,6 +23,13 @@
 HOME_DIR_ENV_VAR = "TG_TESTGEN_HOME"
 STANDALONE_URI_ENV_VAR = "_TG_STANDALONE_URI"
 
+# Stored as ``project_host`` in the demo-DB connection row so that the actual
+# host/port — which can change across sessions on Windows (pgserver picks a
+# fresh ephemeral TCP port each startup) — are resolved at connection-build
+# time by ``resolve_connection_params``. Angle brackets are illegal in real
+# hostnames, so this can't collide with a user-defined connection.
+EMBEDDED_HOST_SENTINEL = "<embedded>"
+
 
 def get_home_dir() -> Path:
     env_dir = os.getenv(HOME_DIR_ENV_VAR)

From c002b5346f31897423b42201d6859dbf82e3ecbb Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Thu, 14 May 2026 23:22:56 -0400
Subject: [PATCH 11/11] release: 5.32.2 -> 5.33.2

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3cae9aed..13924a31 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "dataops-testgen"
-version = "5.32.2"
+version = "5.33.2"
 description = "DataKitchen's Data Quality DataOps TestGen"
 authors = [
     { "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" },