From 075b3f57240c58f752ebae6c8ba3ed0fc0d62efd Mon Sep 17 00:00:00 2001
From: William Hill <william@meroxa.io>
Date: Tue, 24 Feb 2026 13:08:18 -0500
Subject: [PATCH 01/15] docs: design doc for self-service data upload (issue
 #86)

---
 .../2026-02-24-self-service-upload-design.md  | 170 ++++++++++++++++++
 1 file changed, 170 insertions(+)
 create mode 100644 docs/plans/2026-02-24-self-service-upload-design.md
diff --git a/docs/plans/2026-02-24-self-service-upload-design.md b/docs/plans/2026-02-24-self-service-upload-design.md
new file mode 100644
index 0000000..9bf831b
--- /dev/null
+++ b/docs/plans/2026-02-24-self-service-upload-design.md
@@ -0,0 +1,170 @@
+# Design: Self-Service Data Upload (Issue #86)
+
+**Date:** 2026-02-24
+**Author:** Claude Code
+
+---
+
+## Overview
+
+Allow admin and IR users to upload institutional data files directly from the dashboard without
+needing direct database or server access. Two upload paths: course enrollment CSVs (end-to-end
+to Postgres) and PDP cohort/AR files (to Supabase Storage + GitHub Actions ML pipeline trigger).
+
+---
+
+## Scope
+
+**In scope:**
+- Course enrollment CSV → `course_enrollments` Postgres table (upsert)
+- PDP Cohort CSV / PDP AR (.xlsx) → Supabase Storage + GitHub Actions `repository_dispatch`
+- Preview step (first 10 rows + column validation) before commit
+- Role guard: admin and ir only
+
+**Out of scope:**
+- Upload history log (future issue)
+- Column remapping UI (columns must match known schema)
+- ML experiment tracking / MLflow (future issue)
+- Auto-triggering ML pipeline without a server (GitHub Actions is the trigger mechanism)
+
+---
+
+## Pages & Routing
+
+**New page:** `codebenders-dashboard/app/admin/upload/page.tsx`
+
+**Role guard:** Add to `lib/roles.ts` `ROUTE_PERMISSIONS`:
+```ts
+{ prefix: "/admin",     roles: ["admin", "ir"] },
+{ prefix: "/api/admin", roles: ["admin", "ir"] },
+```
+Middleware already enforces this pattern via `x-user-role` header — no other auth code needed.
+
+**Nav link:** Add "Upload Data" to `nav-header.tsx`, visible only to admin/ir roles.
+
+**New API routes:**
+- `POST /api/admin/upload/preview` — parse first 10 rows, return sample + validation summary
+- `POST /api/admin/upload/commit` — full ingest (course → Postgres; PDP/AR → Storage + Actions)
+
+---
+
+## UI Flow (3 States)
+
+### State 1 — Select & Drop
+- Dropdown: file type (`Course Enrollment CSV` | `PDP Cohort CSV` | `PDP AR File (.xlsx)`)
+- Drag-and-drop zone (click to pick; `.csv` for course/cohort, `.csv`+`.xlsx` for AR)
+- "Preview" button → calls `/api/admin/upload/preview`
+
+### State 2 — Preview
+- Shows: detected file type, estimated row count, first 10 rows in a table
+- Validation banner: lists missing required columns or warnings
+- "Confirm & Upload" → calls `/api/admin/upload/commit`
+- "Back" link to return to State 1
+
+### State 3 — Result
+- Course enrollments: `{ inserted, skipped, errors[] }` summary card
+- PDP/AR: "File accepted — ML pipeline queued in GitHub Actions" + link to Actions run
+- "Upload another file" resets to State 1
+
+---
+
+## API Routes
+
+### `POST /api/admin/upload/preview`
+
+**Input:** `multipart/form-data` with `file` and `fileType` fields
+
+**Logic:**
+1. Parse first 50 rows with `csv-parse` (CSV) or `xlsx` (Excel)
+2. Validate required columns exist for the given `fileType`
+3. Return `{ columns, sampleRows (first 10), rowCount (estimated), warnings[] }`
+
+### `POST /api/admin/upload/commit`
+
+**Input:** Same multipart form
+
+**Course enrollment path:**
+1. Stream-parse full CSV with `csv-parse` async iterator
+2. Batch-upsert 500 rows at a time into `course_enrollments` via `pg`
+3. Conflict target: `(student_guid, course_prefix, course_number, academic_term)`
+4. Return `{ inserted, skipped, errors[] }`
+
+**PDP/AR path:**
+1. Upload file to Supabase Storage bucket `pdp-uploads` via `@supabase/supabase-js`
+2. Call GitHub API `POST /repos/{owner}/{repo}/dispatches` with:
+   ```json
+   { "event_type": "ml-pipeline", "client_payload": { "file_path": "<storage-path>" } }
+   ```
+3. Return `{ status: "processing", actionsUrl: "https://github.com/{owner}/{repo}/actions" }`
+
+**Role enforcement:** Read `x-user-role` header (set by middleware); return 403 if not admin/ir.
+
+---
+
+## GitHub Actions Workflow
+
+**File:** `.github/workflows/ml-pipeline.yml`
+
+**Trigger:** `repository_dispatch` with `event_type: ml-pipeline`
+
+**Steps:**
+1. Checkout repo
+2. Set up Python with `venv`
+3. Install dependencies (`pip install -r requirements.txt`)
+4. Download uploaded file from Supabase Storage using `SUPABASE_SERVICE_KEY` secret
+5. Run `venv/bin/python ai_model/complete_ml_pipeline.py --input <downloaded-file-path>`
+6. Upload `ML_PIPELINE_REPORT.txt` as a GitHub Actions artifact (retained 90 days)
+
+**Required secrets:** `SUPABASE_URL`, `SUPABASE_SERVICE_KEY`, `GITHUB_TOKEN` (auto-provided)
+
+---
+
+## Required Column Schemas
+
+### Course Enrollment CSV
+Must include: `student_guid`, `course_prefix`, `course_number`, `academic_year`, `academic_term`
+Optional (all other `course_enrollments` columns): filled as NULL if absent
+
+### PDP Cohort CSV
+Must include: `Institution_ID`, `Cohort`, `Student_GUID`, `Cohort_Term`
+
+### PDP AR File (.xlsx)
+Must include: `Institution_ID`, `Cohort`, `Student_GUID` (first sheet parsed)
+
+---
+
+## New Packages
+
+| Package | Purpose |
+|---------|---------|
+| `csv-parse` | Streaming CSV parsing (async iterator mode) |
+| `xlsx` | Excel (.xlsx) parsing |
+
+---
+
+## New Files
+
+| File | Purpose |
+|------|---------|
+| `codebenders-dashboard/app/admin/upload/page.tsx` | Upload UI page |
+| `codebenders-dashboard/app/api/admin/upload/preview/route.ts` | Preview API route |
+| `codebenders-dashboard/app/api/admin/upload/commit/route.ts` | Commit API route |
+| `.github/workflows/ml-pipeline.yml` | GitHub Actions ML pipeline trigger |
+
+---
+
+## Supabase Changes
+
+**Storage bucket:** Create `pdp-uploads` bucket (private, authenticated access only).
+No new database migrations required — `course_enrollments` table already exists.
+
+**Bucket policy:** Only service role key can read/write. Signed URLs used for pipeline download.
+
+---
+
+## Constraints & Known Limitations
+
+- ML pipeline trigger via GitHub Actions means a ~30-60s delay before the pipeline starts
+- Vercel free tier has a 4.5 MB request body limit — large files should use Supabase Storage direct upload in a future iteration
+- No upload history log in this version (deferred)
+- Column remapping is out of scope — files must match the known schema

From 184202eed8002761efb8416e0a3e16c8b0508733 Mon Sep 17 00:00:00 2001
From: William Hill <william@meroxa.io>
Date: Tue, 24 Feb 2026 13:12:15 -0500
Subject: [PATCH 02/15] docs: implementation plan for self-service data upload
 (issue #86)

---
 docs/plans/2026-02-24-self-service-upload.md | 1135 ++++++++++++++++++
 1 file changed, 1135 insertions(+)
 create mode 100644 docs/plans/2026-02-24-self-service-upload.md

diff --git a/docs/plans/2026-02-24-self-service-upload.md b/docs/plans/2026-02-24-self-service-upload.md
new file mode 100644
index 0000000..2c34769
--- /dev/null
+++ b/docs/plans/2026-02-24-self-service-upload.md
@@ -0,0 +1,1135 @@
+# Self-Service Data Upload Implementation Plan
+
+> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+
+**Goal:** Add a `/admin/upload` page (admin/ir only) for uploading course enrollment CSVs directly into Postgres, and PDP cohort/AR files into Supabase Storage with automatic GitHub Actions ML pipeline triggering.
+
+**Architecture:** Single unified upload page with a 3-state UI (select → preview → result). Two API routes: `/api/admin/upload/preview` (parse first 10 rows, validate columns) and `/api/admin/upload/commit` (course CSV → Postgres batch-upsert; PDP/AR → Supabase Storage + `repository_dispatch` to GitHub Actions). No new DB migrations needed — `course_enrollments` table already exists.
+
+**Tech Stack:** Next.js 16 App Router, `csv-parse` (streaming CSV), `xlsx` (Excel), `@supabase/supabase-js` (Storage), `pg` (Postgres upsert), GitHub REST API (`repository_dispatch`), TypeScript, Tailwind CSS, shadcn/ui
+
+---
+
+## Task 1: Install `csv-parse` and `xlsx` packages
+
+**Files:**
+- Modify: `codebenders-dashboard/package.json` (via npm install)
+
+**Step 1: Install packages**
+
+```bash
+cd codebenders-dashboard && npm install csv-parse xlsx
+```
+
+**Step 2: Verify they appear in `package.json` dependencies**
+
+```bash
+grep -E '"csv-parse"|"xlsx"' package.json
+```
+
+Expected output:
+```
+    "csv-parse": "^5.x.x",
+    "xlsx": "^0.x.x",
+```
+
+**Step 3: Commit**
+
+```bash
+git add codebenders-dashboard/package.json codebenders-dashboard/package-lock.json
+git commit -m "chore: add csv-parse and xlsx packages for file upload"
+```
+
+---
+
+## Task 2: Add role permissions and nav link
+
+**Files:**
+- Modify: `codebenders-dashboard/lib/roles.ts:6-13`
+- Modify: `codebenders-dashboard/components/nav-header.tsx:15-20`
+
+**Step 1: Add `/admin` routes to `ROUTE_PERMISSIONS` in `lib/roles.ts`**
+
+Open `codebenders-dashboard/lib/roles.ts`. After line 13 (`{ prefix: "/api/query-history/export", ... }`), add two new entries so the array looks like:
+
+```ts
+export const ROUTE_PERMISSIONS: Array<{ prefix: string; roles: Role[] }> = [
+  { prefix: "/students",                 roles: ["admin", "advisor", "ir"] },
+  { prefix: "/courses",                  roles: ["admin", "advisor", "ir", "faculty"] },
+  { prefix: "/query",                    roles: ["admin", "advisor", "ir", "faculty"] },
+  { prefix: "/api/students",             roles: ["admin", "advisor", "ir"] },
+  { prefix: "/api/courses",              roles: ["admin", "advisor", "ir", "faculty"] },
+  { prefix: "/api/query-summary",        roles: ["admin", "advisor", "ir", "faculty"] },
+  { prefix: "/api/query-history/export", roles: ["admin", "ir"] },
+  { prefix: "/admin",                    roles: ["admin", "ir"] },
+  { prefix: "/api/admin",                roles: ["admin", "ir"] },
+]
+```
+
+**Step 2: Add "Upload Data" nav link in `nav-header.tsx`**
+
+The `NavHeader` component already receives `role` as a prop. Replace the `NAV_LINKS` constant and its usage so the Upload link only renders for admin/ir:
+
+```tsx
+const NAV_LINKS = [
+  { href: "/",          label: "Dashboard",   roles: null },
+  { href: "/courses",   label: "Courses",     roles: null },
+  { href: "/students",  label: "Students",    roles: null },
+  { href: "/query",     label: "Query",       roles: null },
+  { href: "/admin/upload", label: "Upload Data", roles: ["admin", "ir"] as Role[] },
+]
+```
+
+Then update the `nav` block to filter on role:
+
+```tsx
+<nav className="hidden sm:flex items-center gap-1">
+  {NAV_LINKS.filter(({ roles }) => !roles || roles.includes(role)).map(({ href, label }) => {
+    const active = href === "/" ? pathname === "/" : pathname === href || pathname.startsWith(href + "/")
+    return (
+      <Link
+        key={href}
+        href={href}
+        className={`px-3 py-1 rounded text-sm transition-colors ${
+          active
+            ? "bg-muted font-semibold text-foreground"
+            : "text-muted-foreground hover:text-foreground hover:bg-muted/50"
+        }`}
+      >
+        {label}
+      </Link>
+    )
+  })}
+</nav>
+```
+
+**Step 3: Type-check**
+
+```bash
+cd codebenders-dashboard && npx tsc --noEmit
+```
+
+Expected: no errors.
+
+**Step 4: Commit**
+
+```bash
+git add codebenders-dashboard/lib/roles.ts codebenders-dashboard/components/nav-header.tsx
+git commit -m "feat: add admin/ir role permissions and Upload Data nav link"
+```
+
+---
+
+## Task 3: Add environment variables
+
+**Files:**
+- Modify: `codebenders-dashboard/env.example`
+
+**Step 1: Add new env vars to `env.example`**
+
+Append to the bottom of `codebenders-dashboard/env.example`:
+
+```bash
+# Supabase Storage (for PDP/AR file uploads — use the service role key, not anon)
+# Find in Supabase → Project Settings → API → service_role key
+SUPABASE_SERVICE_ROLE_KEY=your-service-role-key-here
+
+# GitHub Actions ML pipeline trigger
+# Create a PAT at GitHub → Settings → Developer settings → Personal access tokens
+# Required scope: repo (to trigger repository_dispatch)
+GITHUB_PAT=ghp_your-personal-access-token-here
+# Full repo path: owner/repo
+GITHUB_REPO=devcolor/codebenders-datathon
+```
+
+**Step 2: Add the same vars to your local `.env.local`**
+
+Copy the three vars above into `codebenders-dashboard/.env.local` with real values.
+
+**Step 3: Commit**
+
+```bash
+git add codebenders-dashboard/env.example
+git commit -m "docs: add env vars for Supabase Storage and GitHub Actions pipeline trigger"
+```
+
+---
+
+## Task 4: Create the preview API route
+
+**Files:**
+- Create: `codebenders-dashboard/app/api/admin/upload/preview/route.ts`
+
+**Background:** This route accepts a `multipart/form-data` POST with two fields:
+- `file` — the uploaded file (File object)
+- `fileType` — one of `"course_enrollment"`, `"pdp_cohort"`, `"pdp_ar"`
+
+It parses the first 50 rows (or all rows if fewer), validates that required columns are present, and returns a preview payload. For `.xlsx` files, it reads the first sheet. For CSV, it uses `csv-parse`.
+
+**Required columns per file type:**
+- `course_enrollment`: `Student_GUID`, `Course_Prefix`, `Course_Number`, `Academic_Year`, `Academic_Term`
+- `pdp_cohort`: `Institution_ID`, `Cohort`, `Student_GUID`, `Cohort_Term`
+- `pdp_ar`: `Institution_ID`, `Cohort`, `Student_GUID`
+
+**Step 1: Create the route file**
+
+Create `codebenders-dashboard/app/api/admin/upload/preview/route.ts` with this content:
+
+```typescript
+import { type NextRequest, NextResponse } from "next/server"
+import { parse } from "csv-parse/sync"
+import * as XLSX from "xlsx"
+
+const REQUIRED_COLUMNS: Record<string, string[]> = {
+  course_enrollment: ["Student_GUID", "Course_Prefix", "Course_Number", "Academic_Year", "Academic_Term"],
+  pdp_cohort:        ["Institution_ID", "Cohort", "Student_GUID", "Cohort_Term"],
+  pdp_ar:            ["Institution_ID", "Cohort", "Student_GUID"],
+}
+
+export async function POST(request: NextRequest) {
+  const role = request.headers.get("x-user-role")
+  if (role !== "admin" && role !== "ir") {
+    return NextResponse.json({ error: "Forbidden" }, { status: 403 })
+  }
+
+  let formData: FormData
+  try {
+    formData = await request.formData()
+  } catch {
+    return NextResponse.json({ error: "Invalid multipart form data" }, { status: 400 })
+  }
+
+  const file = formData.get("file") as File | null
+  const fileType = formData.get("fileType") as string | null
+
+  if (!file || !fileType) {
+    return NextResponse.json({ error: "Missing file or fileType" }, { status: 400 })
+  }
+  if (!REQUIRED_COLUMNS[fileType]) {
+    return NextResponse.json({ error: `Unknown fileType: ${fileType}` }, { status: 400 })
+  }
+
+  let rows: Record<string, string>[]
+
+  try {
+    const arrayBuf = await file.arrayBuffer()
+    const buffer = Buffer.from(arrayBuf)
+
+    if (file.name.endsWith(".xlsx")) {
+      const wb = XLSX.read(buffer, { type: "buffer" })
+      const ws = wb.Sheets[wb.SheetNames[0]]
+      rows = XLSX.utils.sheet_to_json<Record<string, string>>(ws, { defval: "" })
+    } else {
+      rows = parse(buffer, {
+        columns: true,
+        skip_empty_lines: true,
+        to: 50,
+        cast: false,
+      }) as Record<string, string>[]
+    }
+  } catch (err) {
+    return NextResponse.json(
+      { error: "Failed to parse file", details: err instanceof Error ? err.message : String(err) },
+      { status: 400 }
+    )
+  }
+
+  if (rows.length === 0) {
+    return NextResponse.json({ error: "File is empty" }, { status: 400 })
+  }
+
+  const columns = Object.keys(rows[0])
+  const required = REQUIRED_COLUMNS[fileType]
+  const missing = required.filter(col => !columns.includes(col))
+
+  const warnings: string[] = []
+  if (missing.length > 0) {
+    warnings.push(`Missing required columns: ${missing.join(", ")}`)
+  }
+
+  return NextResponse.json({
+    columns,
+    sampleRows: rows.slice(0, 10),
+    rowCount: rows.length,  // actual count of parsed rows (capped at 50)
+    warnings,
+  })
+}
+```
+
+**Step 2: Type-check**
+
+```bash
+cd codebenders-dashboard && npx tsc --noEmit
+```
+
+Expected: no errors.
+
+**Step 3: Smoke-test with curl** (while `npm run dev` is running)
+
+```bash
+curl -s -X POST http://localhost:3000/api/admin/upload/preview \
+  -H "x-user-role: admin" \
+  -F "fileType=course_enrollment" \
+  -F "file=@../data/bishop_state_courses.csv" | jq '{columns: .columns[:3], rowCount: .rowCount, warnings: .warnings}'
+```
+
+Expected: JSON with `columns` array, `rowCount: 50`, `warnings: []`
+
+**Step 4: Commit**
+
+```bash
+git add codebenders-dashboard/app/api/admin/upload/preview/route.ts
+git commit -m "feat: add POST /api/admin/upload/preview route"
+```
+
+---
+
+## Task 5: Create the commit route — course enrollment path
+
+**Files:**
+- Create: `codebenders-dashboard/app/api/admin/upload/commit/route.ts`
+
+**Background:** For `course_enrollment` file type, stream-parse the full CSV and batch-upsert rows into `public.course_enrollments` in chunks of 500. Use `pg`'s `getPool()` (already available in `lib/db.ts`). The upsert conflict target is `(student_guid, course_prefix, course_number, academic_term)` — you'll need to add a unique constraint migration (Task 7) or use a simpler strategy.
+
+Actually, since the existing load script uses TRUNCATE (not upsert), and there's no unique index on `course_enrollments`, we'll use the same approach: truncate + re-insert. This is idempotent and matches the existing pattern.
+
+**Column mapping** from CSV header names → DB column names (matches the existing load script at `scripts/load-course-enrollments.ts`):
+
+| CSV header | DB column |
+|---|---|
+| Student_GUID | student_guid |
+| Cohort | cohort |
+| Cohort_Term | cohort_term |
+| Academic_Year | academic_year |
+| Academic_Term | academic_term |
+| Course_Prefix | course_prefix |
+| Course_Number | course_number |
+| Course_Name | course_name |
+| Course_CIP | course_cip |
+| Course_Type | course_type |
+| Math_or_English_Gateway | gateway_type |
+| Co_requisite_Course | is_co_requisite (Y/N → boolean) |
+| Core_Course | is_core_course (Y/N → boolean) |
+| Core_Course_Type | core_course_type |
+| Delivery_Method | delivery_method |
+| Grade | grade |
+| Number_of_Credits_Attempted | credits_attempted |
+| Number_of_Credits_Earned | credits_earned |
+| Course_Instructor_Employment_Status | instructor_status |
+
+**Step 1: Create the commit route file (course enrollment path only)**
+
+Create `codebenders-dashboard/app/api/admin/upload/commit/route.ts`:
+
+```typescript
+import { type NextRequest, NextResponse } from "next/server"
+import { parse } from "csv-parse"
+import { Readable } from "stream"
+import { getPool } from "@/lib/db"
+
+const BATCH_SIZE = 500
+
+function toBoolean(val: string): boolean | null {
+  if (val === "Y") return true
+  if (val === "N") return false
+  return null
+}
+
+function toNumeric(val: string): number | null {
+  const t = val.trim()
+  if (!t || t === "null" || t === "NULL") return null
+  const n = parseFloat(t)
+  return isNaN(n) ? null : n
+}
+
+function toNullable(val: string): string | null {
+  const t = val.trim()
+  return t === "" ? null : t
+}
+
+interface EnrollmentRow {
+  student_guid: string
+  cohort: string | null
+  cohort_term: string | null
+  academic_year: string | null
+  academic_term: string | null
+  course_prefix: string | null
+  course_number: string | null
+  course_name: string | null
+  course_cip: string | null
+  course_type: string | null
+  gateway_type: string | null
+  is_co_requisite: boolean | null
+  is_core_course: boolean | null
+  core_course_type: string | null
+  delivery_method: string | null
+  grade: string | null
+  credits_attempted: number | null
+  credits_earned: number | null
+  instructor_status: string | null
+}
+
+const COLS = [
+  "student_guid", "cohort", "cohort_term", "academic_year", "academic_term",
+  "course_prefix", "course_number", "course_name", "course_cip", "course_type",
+  "gateway_type", "is_co_requisite", "is_core_course", "core_course_type",
+  "delivery_method", "grade", "credits_attempted", "credits_earned", "instructor_status",
+] as const
+
+async function insertBatch(client: import("pg").PoolClient, batch: EnrollmentRow[]): Promise<void> {
+  if (batch.length === 0) return
+  const placeholders: string[] = []
+  const params: unknown[] = []
+  batch.forEach((row, ri) => {
+    const p = COLS.map((_, ci) => `$${ri * COLS.length + ci + 1}`).join(", ")
+    placeholders.push(`(${p})`)
+    COLS.forEach(col => params.push(row[col]))
+  })
+  await client.query(
+    `INSERT INTO public.course_enrollments (${COLS.join(", ")}) VALUES ${placeholders.join(", ")}`,
+    params
+  )
+}
+
+async function processCourseEnrollment(buffer: Buffer): Promise<{ inserted: number; skipped: number; errors: string[] }> {
+  const pool = getPool()
+  const client = await pool.connect()
+  let inserted = 0
+  let skipped = 0
+  const errors: string[] = []
+
+  try {
+    await client.query("BEGIN")
+    await client.query("TRUNCATE TABLE public.course_enrollments RESTART IDENTITY")
+
+    const parser = Readable.from(buffer).pipe(
+      parse({ columns: true, skip_empty_lines: true })
+    )
+
+    let batch: EnrollmentRow[] = []
+
+    for await (const record of parser) {
+      const r = record as Record<string, string>
+      const student_guid = toNullable(r["Student_GUID"] ?? "")
+      if (!student_guid) {
+        skipped++
+        continue
+      }
+      batch.push({
+        student_guid,
+        cohort:            toNullable(r["Cohort"] ?? ""),
+        cohort_term:       toNullable(r["Cohort_Term"] ?? ""),
+        academic_year:     toNullable(r["Academic_Year"] ?? ""),
+        academic_term:     toNullable(r["Academic_Term"] ?? ""),
+        course_prefix:     toNullable(r["Course_Prefix"] ?? ""),
+        course_number:     toNullable(r["Course_Number"] ?? ""),
+        course_name:       toNullable(r["Course_Name"] ?? ""),
+        course_cip:        toNullable(r["Course_CIP"] ?? ""),
+        course_type:       toNullable(r["Course_Type"] ?? ""),
+        gateway_type:      toNullable(r["Math_or_English_Gateway"] ?? ""),
+        is_co_requisite:   toBoolean(r["Co_requisite_Course"] ?? ""),
+        is_core_course:    toBoolean(r["Core_Course"] ?? ""),
+        core_course_type:  toNullable(r["Core_Course_Type"] ?? ""),
+        delivery_method:   toNullable(r["Delivery_Method"] ?? ""),
+        grade:             toNullable(r["Grade"] ?? ""),
+        credits_attempted: toNumeric(r["Number_of_Credits_Attempted"] ?? ""),
+        credits_earned:    toNumeric(r["Number_of_Credits_Earned"] ?? ""),
+        instructor_status: toNullable(r["Course_Instructor_Employment_Status"] ?? ""),
+      })
+      inserted++
+      if (batch.length >= BATCH_SIZE) {
+        await insertBatch(client, batch)
+        batch = []
+      }
+    }
+
+    if (batch.length > 0) await insertBatch(client, batch)
+    await client.query("COMMIT")
+  } catch (err) {
+    await client.query("ROLLBACK")
+    errors.push(err instanceof Error ? err.message : String(err))
+    inserted = 0
+  } finally {
+    client.release()
+  }
+
+  return { inserted, skipped, errors }
+}
+
+export async function POST(request: NextRequest) {
+  const role = request.headers.get("x-user-role")
+  if (role !== "admin" && role !== "ir") {
+    return NextResponse.json({ error: "Forbidden" }, { status: 403 })
+  }
+
+  let formData: FormData
+  try {
+    formData = await request.formData()
+  } catch {
+    return NextResponse.json({ error: "Invalid multipart form data" }, { status: 400 })
+  }
+
+  const file = formData.get("file") as File | null
+  const fileType = formData.get("fileType") as string | null
+
+  if (!file || !fileType) {
+    return NextResponse.json({ error: "Missing file or fileType" }, { status: 400 })
+  }
+
+  const buffer = Buffer.from(await file.arrayBuffer())
+
+  if (fileType === "course_enrollment") {
+    const result = await processCourseEnrollment(buffer)
+    return NextResponse.json(result)
+  }
+
+  // PDP/AR path — placeholder, implemented in Task 6
+  return NextResponse.json({ error: `fileType "${fileType}" not yet implemented` }, { status: 501 })
+}
+```
+
+**Step 2: Type-check**
+
+```bash
+cd codebenders-dashboard && npx tsc --noEmit
+```
+
+Expected: no errors.
+
+**Step 3: Smoke-test with curl** (while `npm run dev` is running)
+
+```bash
+curl -s -X POST http://localhost:3000/api/admin/upload/commit \
+  -H "x-user-role: admin" \
+  -F "fileType=course_enrollment" \
+  -F "file=@../data/bishop_state_courses.csv" | jq .
+```
+
+Expected: `{"inserted": <N>, "skipped": 0, "errors": []}`
+
+**Step 4: Commit**
+
+```bash
+git add codebenders-dashboard/app/api/admin/upload/commit/route.ts
+git commit -m "feat: add POST /api/admin/upload/commit — course enrollment upsert path"
+```
+
+---
+
+## Task 6: Extend commit route — PDP/AR path (Supabase Storage + GitHub dispatch)
+
+**Files:**
+- Modify: `codebenders-dashboard/app/api/admin/upload/commit/route.ts`
+
+**Background:** For `pdp_cohort` and `pdp_ar` file types, the commit route:
+1. Creates a Supabase service-role client (uses `SUPABASE_SERVICE_ROLE_KEY`)
+2. Uploads the file to the `pdp-uploads` Storage bucket with path `<fileType>/<timestamp>-<filename>`
+3. Calls the GitHub `repository_dispatch` API with `GITHUB_PAT` and `GITHUB_REPO` env vars
+4. Returns `{ status: "processing", storageKey, actionsUrl }`
+
+**Before this task:** Create the `pdp-uploads` bucket in your Supabase dashboard:
+- Supabase → Storage → New bucket → name: `pdp-uploads` → Private
+
+**Step 1: Add the PDP/AR handler to the commit route**
+
+In `codebenders-dashboard/app/api/admin/upload/commit/route.ts`, add these imports at the top:
+
+```typescript
+import { createClient } from "@supabase/supabase-js"
+```
+
+Add this function before the `POST` handler:
+
+```typescript
+async function processPdpFile(
+  buffer: Buffer,
+  fileName: string,
+  fileType: string,
+): Promise<{ status: string; storageKey: string; actionsUrl: string }> {
+  const supabaseUrl = process.env.NEXT_PUBLIC_SUPABASE_URL
+  const serviceKey  = process.env.SUPABASE_SERVICE_ROLE_KEY
+  const githubPat   = process.env.GITHUB_PAT
+  const githubRepo  = process.env.GITHUB_REPO
+
+  if (!supabaseUrl || !serviceKey) throw new Error("Missing SUPABASE_SERVICE_ROLE_KEY")
+  if (!githubPat || !githubRepo)  throw new Error("Missing GITHUB_PAT or GITHUB_REPO")
+
+  // 1. Upload to Supabase Storage
+  const supabase   = createClient(supabaseUrl, serviceKey)
+  const storageKey = `${fileType}/${Date.now()}-${fileName}`
+  const { error: uploadError } = await supabase.storage
+    .from("pdp-uploads")
+    .upload(storageKey, buffer, { contentType: "application/octet-stream", upsert: false })
+
+  if (uploadError) throw new Error(`Storage upload failed: ${uploadError.message}`)
+
+  // 2. Trigger GitHub Actions via repository_dispatch
+  const dispatchRes = await fetch(
+    `https://api.github.com/repos/${githubRepo}/dispatches`,
+    {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${githubPat}`,
+        Accept: "application/vnd.github+json",
+        "X-GitHub-Api-Version": "2022-11-28",
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        event_type: "ml-pipeline",
+        client_payload: { storage_key: storageKey, file_type: fileType },
+      }),
+    }
+  )
+
+  if (!dispatchRes.ok) {
+    const body = await dispatchRes.text()
+    throw new Error(`GitHub dispatch failed (${dispatchRes.status}): ${body}`)
+  }
+
+  const actionsUrl = `https://github.com/${githubRepo}/actions`
+  return { status: "processing", storageKey, actionsUrl }
+}
+```
+
+Replace the placeholder in the `POST` handler at the bottom:
+
+```typescript
+  if (fileType === "pdp_cohort" || fileType === "pdp_ar") {
+    try {
+      const result = await processPdpFile(buffer, file.name, fileType)
+      return NextResponse.json(result)
+    } catch (err) {
+      return NextResponse.json(
+        { error: err instanceof Error ? err.message : String(err) },
+        { status: 500 }
+      )
+    }
+  }
+
+  return NextResponse.json({ error: `Unknown fileType: ${fileType}` }, { status: 400 })
+```
+
+**Step 2: Type-check**
+
+```bash
+cd codebenders-dashboard && npx tsc --noEmit
+```
+
+Expected: no errors.
+
+**Step 3: Commit**
+
+```bash
+git add codebenders-dashboard/app/api/admin/upload/commit/route.ts
+git commit -m "feat: extend commit route with PDP/AR → Supabase Storage + GitHub Actions dispatch"
+```
+
+---
+
+## Task 7: Create GitHub Actions ML pipeline workflow
+
+**Files:**
+- Create: `.github/workflows/ml-pipeline.yml`
+
+**Background:** This workflow fires on `repository_dispatch` with `event_type: ml-pipeline`. It:
+1. Downloads the uploaded file from Supabase Storage using a signed URL
+2. Determines the target data file path from `file_type` in the payload
+3. Replaces the appropriate file in `data/` with the uploaded one
+4. Runs the Python ML pipeline
+5. Uploads `ML_PIPELINE_REPORT.txt` as an artifact
+
+**Required GitHub Actions secrets** (set at repo level: Settings → Secrets → Actions):
+- `SUPABASE_URL` — your Supabase project URL
+- `SUPABASE_SERVICE_ROLE_KEY` — service role key for Storage access
+- `DB_HOST`, `DB_USER`, `DB_PASSWORD`, `DB_PORT`, `DB_NAME`, `DB_SSL` — Postgres credentials
+
+**Step 1: Create the workflow file**
+
+Create `.github/workflows/ml-pipeline.yml`:
+
+```yaml
+name: ML Pipeline
+
+on:
+  repository_dispatch:
+    types: [ml-pipeline]
+
+jobs:
+  run-pipeline:
+    name: Download data file and run ML pipeline
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Create virtualenv and install dependencies
+        run: |
+          python -m venv venv
+          venv/bin/pip install --upgrade pip
+          venv/bin/pip install -r requirements.txt
+
+      - name: Download uploaded file from Supabase Storage
+        env:
+          SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
+          SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }}
+          STORAGE_KEY: ${{ github.event.client_payload.storage_key }}
+          FILE_TYPE: ${{ github.event.client_payload.file_type }}
+        run: |
+          python - <<'EOF'
+          import os, urllib.request, json
+
+          url      = os.environ["SUPABASE_URL"]
+          key      = os.environ["SUPABASE_SERVICE_ROLE_KEY"]
+          storage_key = os.environ["STORAGE_KEY"]
+          file_type   = os.environ["FILE_TYPE"]
+
+          # Get a signed download URL via Supabase Storage REST API
+          sign_url = f"{url}/storage/v1/object/sign/pdp-uploads/{storage_key}"
+          req = urllib.request.Request(
+              sign_url,
+              data=json.dumps({"expiresIn": 600}).encode(),
+              headers={
+                  "Authorization": f"Bearer {key}",
+                  "Content-Type": "application/json",
+                  "apikey": key,
+              },
+              method="POST",
+          )
+          with urllib.request.urlopen(req) as resp:
+              signed = json.loads(resp.read())
+          signed_url = f"{url}/storage/v1{signed['signedURL']}"
+
+          # Determine destination path
+          dest = {
+              "pdp_cohort": "data/bishop_state_cohorts_with_zip.csv",
+              "pdp_ar":     "data/ar_bscc_with_zip.csv",
+          }.get(file_type)
+          if not dest:
+              raise ValueError(f"Unknown file_type: {file_type}")
+
+          print(f"Downloading to {dest}...")
+          urllib.request.urlretrieve(signed_url, dest)
+          print("Download complete.")
+          EOF
+
+      - name: Run ML pipeline
+        env:
+          DB_HOST: ${{ secrets.DB_HOST }}
+          DB_USER: ${{ secrets.DB_USER }}
+          DB_PASSWORD: ${{ secrets.DB_PASSWORD }}
+          DB_PORT: ${{ secrets.DB_PORT }}
+          DB_NAME: ${{ secrets.DB_NAME }}
+          DB_SSL: ${{ secrets.DB_SSL }}
+        run: |
+          venv/bin/python ai_model/complete_ml_pipeline.py
+
+      - name: Upload ML pipeline report
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: ml-pipeline-report-${{ github.run_id }}
+          path: ML_PIPELINE_REPORT.txt
+          retention-days: 90
+```
+
+**Step 2: Commit**
+
+```bash
+git add .github/workflows/ml-pipeline.yml
+git commit -m "feat: add GitHub Actions ML pipeline workflow triggered by repository_dispatch"
+```
+
+---
+
+## Task 8: Create the upload page UI
+
+**Files:**
+- Create: `codebenders-dashboard/app/admin/upload/page.tsx`
+
+**Background:** This is a client component (`"use client"`) with three local state phases: `idle` (file selection), `preview` (showing sample rows + warnings), and `result` (showing outcome). It uses `fetch` to call the two API routes. Drag-and-drop is implemented with native HTML5 `onDrop` / `onDragOver` events.
+
+**Step 1: Create the page file**
+
+Create `codebenders-dashboard/app/admin/upload/page.tsx`:
+
+```tsx
+"use client"
+
+import { useState, useCallback } from "react"
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"
+import { Button } from "@/components/ui/button"
+import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"
+import { Upload, AlertCircle, CheckCircle2, Loader2 } from "lucide-react"
+
+type FileType = "course_enrollment" | "pdp_cohort" | "pdp_ar"
+type Phase = "idle" | "previewing" | "preview" | "committing" | "result"
+
+interface PreviewData {
+  columns: string[]
+  sampleRows: Record<string, string>[]
+  rowCount: number
+  warnings: string[]
+}
+
+interface CommitResult {
+  // Course enrollment
+  inserted?: number
+  skipped?: number
+  errors?: string[]
+  // PDP/AR
+  status?: string
+  storageKey?: string
+  actionsUrl?: string
+  error?: string
+}
+
+const FILE_TYPE_LABELS: Record<FileType, string> = {
+  course_enrollment: "Course Enrollment CSV",
+  pdp_cohort:        "PDP Cohort CSV",
+  pdp_ar:            "PDP AR File (.xlsx)",
+}
+
+const FILE_TYPE_ACCEPT: Record<FileType, string> = {
+  course_enrollment: ".csv",
+  pdp_cohort:        ".csv",
+  pdp_ar:            ".csv,.xlsx",
+}
+
+export default function UploadPage() {
+  const [fileType, setFileType]     = useState<FileType>("course_enrollment")
+  const [file, setFile]             = useState<File | null>(null)
+  const [phase, setPhase]           = useState<Phase>("idle")
+  const [preview, setPreview]       = useState<PreviewData | null>(null)
+  const [result, setResult]         = useState<CommitResult | null>(null)
+  const [dragOver, setDragOver]     = useState(false)
+  const [errorMsg, setErrorMsg]     = useState<string | null>(null)
+
+  const handleFile = useCallback((f: File) => {
+    setFile(f)
+    setErrorMsg(null)
+    setPhase("idle")
+    setPreview(null)
+    setResult(null)
+  }, [])
+
+  const handleDrop = useCallback((e: React.DragEvent) => {
+    e.preventDefault()
+    setDragOver(false)
+    const dropped = e.dataTransfer.files[0]
+    if (dropped) handleFile(dropped)
+  }, [handleFile])
+
+  const handlePreview = async () => {
+    if (!file) return
+    setPhase("previewing")
+    setErrorMsg(null)
+    const fd = new FormData()
+    fd.append("file", file)
+    fd.append("fileType", fileType)
+    try {
+      const res = await fetch("/api/admin/upload/preview", { method: "POST", body: fd })
+      const data = await res.json()
+      if (!res.ok) { setErrorMsg(data.error ?? "Preview failed"); setPhase("idle"); return }
+      setPreview(data as PreviewData)
+      setPhase("preview")
+    } catch (err) {
+      setErrorMsg(err instanceof Error ? err.message : "Network error")
+      setPhase("idle")
+    }
+  }
+
+  const handleCommit = async () => {
+    if (!file) return
+    setPhase("committing")
+    setErrorMsg(null)
+    const fd = new FormData()
+    fd.append("file", file)
+    fd.append("fileType", fileType)
+    try {
+      const res = await fetch("/api/admin/upload/commit", { method: "POST", body: fd })
+      const data = await res.json()
+      if (!res.ok) { setErrorMsg(data.error ?? "Upload failed"); setPhase("preview"); return }
+      setResult(data as CommitResult)
+      setPhase("result")
+    } catch (err) {
+      setErrorMsg(err instanceof Error ? err.message : "Network error")
+      setPhase("preview")
+    }
+  }
+
+  const reset = () => {
+    setFile(null)
+    setPhase("idle")
+    setPreview(null)
+    setResult(null)
+    setErrorMsg(null)
+  }
+
+  return (
+    <main className="container mx-auto px-4 py-8 max-w-4xl">
+      <div className="mb-6">
+        <h1 className="text-2xl font-bold text-foreground">Upload Data</h1>
+        <p className="text-muted-foreground text-sm mt-1">
+          Import course enrollment CSVs or PDP/AR files. Admin and IR only.
+        </p>
+      </div>
+
+      {/* ── Phase: idle / selecting ── */}
+      {(phase === "idle" || phase === "previewing") && (
+        <Card>
+          <CardHeader>
+            <CardTitle>Select File</CardTitle>
+            <CardDescription>Choose a file type, then drop or pick your file.</CardDescription>
+          </CardHeader>
+          <CardContent className="space-y-4">
+            {/* File type selector */}
+            <div className="flex flex-wrap gap-2">
+              {(Object.keys(FILE_TYPE_LABELS) as FileType[]).map(ft => (
+                <button
+                  key={ft}
+                  onClick={() => { setFileType(ft); setFile(null); setErrorMsg(null) }}
+                  className={`px-3 py-1.5 rounded text-sm border transition-colors ${
+                    fileType === ft
+                      ? "bg-primary text-primary-foreground border-primary"
+                      : "border-border text-muted-foreground hover:text-foreground hover:border-foreground"
+                  }`}
+                >
+                  {FILE_TYPE_LABELS[ft]}
+                </button>
+              ))}
+            </div>
+
+            {/* Drop zone */}
+            <label
+              htmlFor="file-input"
+              onDrop={handleDrop}
+              onDragOver={e => { e.preventDefault(); setDragOver(true) }}
+              onDragLeave={() => setDragOver(false)}
+              className={`flex flex-col items-center justify-center gap-3 border-2 border-dashed rounded-lg p-12 cursor-pointer transition-colors ${
+                dragOver
+                  ? "border-primary bg-primary/5"
+                  : "border-border hover:border-muted-foreground"
+              }`}
+            >
+              <Upload className="h-8 w-8 text-muted-foreground" />
+              {file ? (
+                <div className="text-center">
+                  <p className="text-sm font-medium text-foreground">{file.name}</p>
+                  <p className="text-xs text-muted-foreground">{(file.size / 1024).toFixed(1)} KB</p>
+                </div>
+              ) : (
+                <div className="text-center">
+                  <p className="text-sm font-medium text-foreground">Drop file here or click to browse</p>
+                  <p className="text-xs text-muted-foreground">Accepts: {FILE_TYPE_ACCEPT[fileType]}</p>
+                </div>
+              )}
+              <input
+                id="file-input"
+                type="file"
+                accept={FILE_TYPE_ACCEPT[fileType]}
+                className="hidden"
+                onChange={e => { const f = e.target.files?.[0]; if (f) handleFile(f) }}
+              />
+            </label>
+
+            {errorMsg && (
+              <div className="flex items-start gap-2 p-3 bg-destructive/10 border border-destructive/30 rounded text-sm text-destructive">
+                <AlertCircle className="h-4 w-4 mt-0.5 shrink-0" />
+                {errorMsg}
+              </div>
+            )}
+
+            <Button onClick={handlePreview} disabled={!file || phase === "previewing"} className="w-full">
+              {phase === "previewing" ? <><Loader2 className="h-4 w-4 mr-2 animate-spin" />Parsing...</> : "Preview"}
+            </Button>
+          </CardContent>
+        </Card>
+      )}
+
+      {/* ── Phase: preview ── */}
+      {(phase === "preview" || phase === "committing") && preview && (
+        <Card>
+          <CardHeader>
+            <CardTitle>Preview — {FILE_TYPE_LABELS[fileType]}</CardTitle>
+            <CardDescription>
+              {file?.name} · {preview.rowCount} rows parsed
+            </CardDescription>
+          </CardHeader>
+          <CardContent className="space-y-4">
+            {preview.warnings.length > 0 && (
+              <div className="space-y-1">
+                {preview.warnings.map((w, i) => (
+                  <div key={i} className="flex items-start gap-2 p-3 bg-yellow-50 dark:bg-yellow-950/20 border border-yellow-200 dark:border-yellow-800 rounded text-sm text-yellow-800 dark:text-yellow-200">
+                    <AlertCircle className="h-4 w-4 mt-0.5 shrink-0" />
+                    {w}
+                  </div>
+                ))}
+              </div>
+            )}
+
+            <div className="rounded-md border border-border overflow-auto max-h-72">
+              <Table>
+                <TableHeader>
+                  <TableRow>
+                    {preview.columns.slice(0, 8).map(col => (
+                      <TableHead key={col} className="text-xs whitespace-nowrap">{col}</TableHead>
+                    ))}
+                    {preview.columns.length > 8 && <TableHead className="text-xs text-muted-foreground">+{preview.columns.length - 8} more</TableHead>}
+                  </TableRow>
+                </TableHeader>
+                <TableBody>
+                  {preview.sampleRows.map((row, i) => (
+                    <TableRow key={i}>
+                      {preview.columns.slice(0, 8).map(col => (
+                        <TableCell key={col} className="text-xs max-w-32 truncate">{String(row[col] ?? "")}</TableCell>
+                      ))}
+                      {preview.columns.length > 8 && <TableCell />}
+                    </TableRow>
+                  ))}
+                </TableBody>
+              </Table>
+            </div>
+
+            {errorMsg && (
+              <div className="flex items-start gap-2 p-3 bg-destructive/10 border border-destructive/30 rounded text-sm text-destructive">
+                <AlertCircle className="h-4 w-4 mt-0.5 shrink-0" />
+                {errorMsg}
+              </div>
+            )}
+
+            <div className="flex gap-2">
+              <Button variant="outline" onClick={reset} disabled={phase === "committing"}>Back</Button>
+              <Button
+                onClick={handleCommit}
+                disabled={phase === "committing" || preview.warnings.some(w => w.startsWith("Missing required"))}
+                className="flex-1"
+              >
+                {phase === "committing" ? <><Loader2 className="h-4 w-4 mr-2 animate-spin" />Uploading...</> : "Confirm & Upload"}
+              </Button>
+            </div>
+          </CardContent>
+        </Card>
+      )}
+
+      {/* ── Phase: result ── */}
+      {phase === "result" && result && (
+        <Card>
+          <CardHeader>
+            <CardTitle className="flex items-center gap-2">
+              <CheckCircle2 className="h-5 w-5 text-green-600" />
+              Upload Complete
+            </CardTitle>
+          </CardHeader>
+          <CardContent className="space-y-4">
+            {result.inserted !== undefined && (
+              <div className="space-y-1 text-sm">
+                <p><span className="font-medium">{result.inserted.toLocaleString()}</span> rows inserted</p>
+                {(result.skipped ?? 0) > 0 && <p className="text-muted-foreground">{result.skipped} rows skipped (missing Student_GUID)</p>}
+                {result.errors && result.errors.length > 0 && (
+                  <div className="p-3 bg-destructive/10 border border-destructive/30 rounded text-destructive">
+                    {result.errors.map((e, i) => <p key={i}>{e}</p>)}
+                  </div>
+                )}
+              </div>
+            )}
+            {result.status === "processing" && (
+              <div className="space-y-2 text-sm">
+                <p>File saved to Supabase Storage. The ML pipeline has been queued in GitHub Actions.</p>
+                {result.actionsUrl && (
+                  <a
+                    href={result.actionsUrl}
+                    target="_blank"
+                    rel="noopener noreferrer"
+                    className="text-primary underline"
+                  >
+                    View pipeline run on GitHub Actions →
+                  </a>
+                )}
+              </div>
+            )}
+            {result.error && (
+              <div className="p-3 bg-destructive/10 border border-destructive/30 rounded text-sm text-destructive">
+                {result.error}
+              </div>
+            )}
+            <Button variant="outline" onClick={reset} className="w-full">Upload another file</Button>
+          </CardContent>
+        </Card>
+      )}
+    </main>
+  )
+}
+```
+
+**Step 2: Type-check**
+
+```bash
+cd codebenders-dashboard && npx tsc --noEmit
+```
+
+Expected: no errors.
+
+**Step 3: Visual check** (while `npm run dev` is running)
+
+- Log in as an admin or IR user
+- Navigate to `/admin/upload`
+- Verify "Upload Data" appears in the nav
+- Try dragging and dropping `data/bishop_state_courses.csv`
+- Verify the preview table shows first 10 rows
+- Verify "Confirm & Upload" runs and returns a result
+
+**Step 4: Commit**
+
+```bash
+git add codebenders-dashboard/app/admin/upload/page.tsx
+git commit -m "feat: add /admin/upload page with drag-drop, preview, and commit UI"
+```
+
+---
+
+## Task 9: Final type-check, lint, and push
+
+**Step 1: Full type-check + lint**
+
+```bash
+cd codebenders-dashboard && npx tsc --noEmit && npm run lint
+```
+
+Expected: 0 errors, 0 warnings (or only pre-existing warnings).
+
+**Step 2: Push and open PR**
+
+```bash
+git push origin <your-feature-branch>
+gh pr create \
+  --title "feat: self-service data upload for course and PDP/AR files (#86)" \
+  --body "Closes #86
+
+## Summary
+- \`/admin/upload\` page (admin/ir only) with drag-drop, preview, and commit
+- Course enrollment CSVs stream-parsed and batch-upserted into \`course_enrollments\` Postgres table
+- PDP cohort CSVs and AR .xlsx files uploaded to Supabase Storage \`pdp-uploads\` bucket
+- GitHub Actions workflow \`ml-pipeline.yml\` triggered via \`repository_dispatch\` after PDP/AR upload
+
+## New env vars required (see env.example)
+- \`SUPABASE_SERVICE_ROLE_KEY\`
+- \`GITHUB_PAT\`
+- \`GITHUB_REPO\`
+
+## GitHub Actions secrets required
+- \`SUPABASE_URL\`, \`SUPABASE_SERVICE_ROLE_KEY\`, \`DB_HOST\`, \`DB_USER\`, \`DB_PASSWORD\`, \`DB_PORT\`, \`DB_NAME\`, \`DB_SSL\`
+
+## Test plan
+- [ ] Admin/IR can access \`/admin/upload\`; other roles get redirected
+- [ ] Upload Data nav link visible to admin/IR only
+- [ ] Course enrollment CSV preview shows first 10 rows with correct columns
+- [ ] Course enrollment commit inserts rows into \`course_enrollments\` table
+- [ ] PDP cohort CSV commit uploads to Supabase Storage and returns \`status: processing\`
+- [ ] \`npx tsc --noEmit\` passes with 0 errors
+"
+```

From cb20f2d6ec0c2de3a0c48fc8d7d203c3f6898a6a Mon Sep 17 00:00:00 2001
From: William Hill <mjh2225@gmail.com>
Date: Fri, 27 Mar 2026 21:13:41 -0400
Subject: [PATCH 03/15] docs: design spec for config-driven distillation
 pipeline

Per-school fine-tuning pipeline to replace OpenAI dependency for
explanation and summarization endpoints with locally-served Qwen 3.5
models via MLX and Ollama.
---
 ...2026-03-27-distillation-pipeline-design.md | 550 ++++++++++++++++++
 1 file changed, 550 insertions(+)
 create mode 100644 docs/superpowers/specs/2026-03-27-distillation-pipeline-design.md

diff --git a/docs/superpowers/specs/2026-03-27-distillation-pipeline-design.md b/docs/superpowers/specs/2026-03-27-distillation-pipeline-design.md
new file mode 100644
index 0000000..761c553
--- /dev/null
+++ b/docs/superpowers/specs/2026-03-27-distillation-pipeline-design.md
@@ -0,0 +1,550 @@
+# Config-Driven Distillation Pipeline for Per-School Fine-Tuned Models
+
+**Date:** 2026-03-27
+**Status:** Draft
+**Goal:** Replace OpenAI API dependency for explanation and summarization endpoints with locally-served, per-school fine-tuned models via a repeatable, config-driven training pipeline.
+
+---
+
+## 1. Problem
+
+The dashboard currently calls OpenAI GPT-4o-mini in two explanation/summarization endpoints:
+
+- `/api/courses/explain-pairing` — course pairing explanations
+- `/api/query-summary` — query result summaries
+
+This creates per-call API costs, latency, and a dependency on an external service. The explanations are also generic — they lack institutional context about each school's programs, challenges, demographics, and interventions.
+
+## 2. Solution
+
+A config-driven distillation pipeline that:
+
+1. Takes a per-school YAML config describing the school's schema, domain knowledge, and context
+2. Uses a teacher model (Claude Sonnet or Qwen 3.5 locally) to generate high-quality training pairs
+3. Fine-tunes a small open-source model (Qwen 3.5 4B or 9B) via MLX on Apple Silicon
+4. Evaluates the model against ship criteria
+5. Exports to Ollama for local serving
+
+New school = new config file + run the pipeline. No code changes needed.
+
+## 3. Architecture
+
+### Directory Structure
+
+```
+schools/
+  bishop-state/
+    config.yaml              # Schema, domain knowledge, explanation style
+    seed_queries.yaml        # Example questions users ask at this school
+  akron/
+    config.yaml
+    seed_queries.yaml
+
+training/
+  distill.py                 # Step 1: Generate training pairs via teacher model
+  prepare.py                 # Step 2: Filter, dedup, split (80/10/10)
+  finetune.py                # Step 3: Fine-tune via MLX (Qwen 3.5)
+  eval.py                    # Step 4: Evaluate model quality
+  export.py                  # Step 5: Package for Ollama
+  config.py                  # Shared constants
+  prompts.py                 # Teacher prompts (school-agnostic templates)
+
+training_data/
+  bishop-state/
+    pairs/                   # Raw distilled pairs (explainer.jsonl, summarizer.jsonl)
+    final/                   # Train/val/test splits per adapter
+    models/                  # Fine-tuned LoRA adapters
+      qwen3.5-9b/
+        explainer/
+          adapter_config.json
+          adapter_model.safetensors
+        summarizer/
+          adapter_config.json
+          adapter_model.safetensors
+```
+
+### CLI
+
+```bash
+python -m training.distill  --school bishop-state [--local]    # Generate pairs
+python -m training.prepare  --school bishop-state               # Filter/split
+python -m training.finetune --school bishop-state --model 9b    # Train
+python -m training.eval     --school bishop-state               # Evaluate
+python -m training.export   --school bishop-state               # Deploy to Ollama
+```
+
+## 4. School Config Format
+
+Each school gets a `config.yaml` capturing everything the pipeline needs. Sections:
+
+### Core Identity
+
+```yaml
+school:
+  name: "Bishop State Community College"
+  code: "bscc"
+  type: "community_college"
+  designation: ["hbcu", "minority_serving"]
+  accreditation: "SACSCOC"
+  founded: 1927
+```
+
+### Location and Setting
+
+```yaml
+  location:
+    address: "351 North Broad Street"
+    city: "Mobile"
+    state: "Alabama"
+    zip: "36603"
+    county: "Mobile County"
+    region: "Gulf Coast"
+    setting: "urban"
+    climate_zone: "subtropical"
+```
+
+### Enrollment Profile
+
+```yaml
+  enrollment:
+    total_headcount: 4200
+    fte: 2800
+    undergraduate_only: true
+    residential: false
+    percent_full_time: 0.42
+    percent_part_time: 0.58
+    percent_online: 0.35
+    open_admission: true
+```
+
+### Demographics
+
+```yaml
+  demographics:
+    percent_black: 0.72
+    percent_white: 0.18
+    percent_hispanic: 0.05
+    percent_other: 0.05
+    percent_pell_eligible: 0.68
+    percent_first_gen: 0.55
+    percent_adult_learners: 0.40
+    median_household_income_area: 42000
+```
+
+### Database Schema
+
+```yaml
+database:
+  main_table: "student_level_with_predictions"
+  course_table: "course_enrollments"
+  connection_env: "DATABASE_URL"
+
+schema:
+  student_columns:
+    Cohort: "Cohort year (numeric: 2019, 2020, etc.)"
+    Race: "Student race/ethnicity"
+    Gender: "Student gender"
+    Retention: "Retention indicator (0 or 1)"
+    GPA_Group_Year_1: "GPA in year 1"
+    # ... full column list from route.ts SCHEMA_INFO
+  course_columns:
+    course_prefix: "Course dept code (MAT, ENG, NUR, etc.)"
+    grade: "Student grade (A, B, C, D, F, W, I)"
+    # ... full column list
+```
+
+### Domain Knowledge
+
+```yaml
+domain:
+  programs:
+    - name: "Nursing (ADN)"
+      cip: "51.3801"
+      gateway_courses: ["BIO 201", "MAT 110"]
+    - name: "Welding Technology"
+      cip: "48.0508"
+      gateway_courses: ["WDT 108", "WDT 109"]
+  key_metrics: ["retention_rate", "dfwi_rate", "gateway_pass_rate"]
+  terminology:
+    credential: "associate degree or certificate"
+    at_risk: "students flagged by early warning system"
+```
+
+### Workforce and Outcomes
+
+```yaml
+  workforce:
+    top_employers: ["Austal USA", "Mobile Infirmary", "AM/NS Calvert"]
+    high_demand_fields: ["healthcare", "advanced_manufacturing", "maritime"]
+
+  outcomes:
+    job_placement_rate_6mo: 0.78
+    median_salary_after_credential:
+      associate: 34000
+      certificate: 29000
+    licensure_pass_rates:
+      nursing_nclex: 0.89
+      welding_aws: 0.92
+```
+
+### Peer Context
+
+```yaml
+  peers:
+    ipeds_id: "101505"
+    carnegie_class: "Associate's—High Transfer-High Traditional"
+    peer_institutions: ["Lawson State CC", "Shelton State CC"]
+    state_system: "Alabama Community College System"
+```
+
+### Financial Context
+
+```yaml
+  financial:
+    in_state_tuition: 4800
+    avg_financial_aid_package: 5200
+    percent_receiving_aid: 0.82
+    percent_student_loans: 0.25
+    cost_of_living_index: 87.3
+    emergency_aid_fund: true
+```
+
+### Completion Context
+
+```yaml
+  completion:
+    ipeds_graduation_rate: 0.18
+    adjusted_completion_rate: 0.42
+    avg_time_to_credential: 3.2
+    percent_transfer_out: 0.24
+    percent_stop_out_return: 0.15
+    top_completion_barriers:
+      - "developmental_math_sequences"
+      - "financial_emergencies"
+      - "work_schedule_conflicts"
+```
+
+### Faculty and Instruction
+
+```yaml
+  instruction:
+    student_faculty_ratio: 18
+    percent_full_time_faculty: 0.45
+    percent_adjunct: 0.55
+    developmental_ed_model: "corequisite"
+```
+
+### Student Pipeline
+
+```yaml
+  pipeline:
+    feeder_high_schools:
+      - name: "Williamson High School"
+        percent_of_enrollment: 0.12
+        avg_readiness: "below_college_level"
+    percent_ged: 0.11
+    percent_veterans: 0.07
+    percent_career_changers: 0.14
+    primary_recruitment_radius_miles: 35
+```
+
+### Digital Access
+
+```yaml
+  technology:
+    percent_students_with_reliable_wifi: 0.71
+    percent_students_with_personal_laptop: 0.64
+    campus_device_lending: true
+    broadband_desert_overlap: true
+```
+
+### Transportation and Access
+
+```yaml
+  access:
+    campus_count: 4
+    campuses:
+      - name: "Main Campus"
+        public_transit_accessible: true
+      - name: "Southwest Campus"
+        public_transit_accessible: false
+    percent_students_commute_30_plus_min: 0.35
+    evening_weekend_classes: true
+```
+
+### Equity Gaps and Initiatives
+
+```yaml
+  equity:
+    known_gaps:
+      - metric: "gateway_math_pass_rate"
+        group_a: { name: "Black male students", value: 0.41 }
+        group_b: { name: "Overall", value: 0.58 }
+        initiative: "Male Student Success mentoring program"
+    minority_male_initiative: "Brother 2 Brother"
+```
+
+### Active Interventions
+
+```yaml
+  interventions:
+    active:
+      - name: "Starfish Early Alert"
+        type: "early_warning"
+        target: "all students"
+        trigger: "missed 2+ classes or below C at midterm"
+        effectiveness: "12% retention lift in pilot cohorts"
+      - name: "Emergency Micro-Grants"
+        type: "financial"
+        max_award: 500
+        effectiveness: "78% of recipients re-enrolled next term"
+```
+
+### Student Life
+
+```yaml
+  student_life:
+    percent_working_while_enrolled: 0.72
+    percent_working_over_20hrs: 0.48
+    percent_single_parents: 0.18
+    food_insecurity_rate: 0.31
+    housing_insecurity_rate: 0.14
+```
+
+### Community Health Context
+
+```yaml
+  health:
+    mental_health_counselor_ratio: "1:1400"
+    community_health_context:
+      - "Mobile County has highest diabetes rate in Alabama"
+      - "Limited mental health providers in service area"
+```
+
+### Seasonal Patterns
+
+```yaml
+  patterns:
+    high_attrition_points:
+      - week: 4
+        reason: "Financial aid disbursement delays"
+      - week: 8
+        reason: "Midterm performance shock"
+      - month: "October"
+        reason: "Hurricane season peak"
+    summer_melt_rate: 0.22
+```
+
+### Historical Trends
+
+```yaml
+  trends:
+    enrollment_direction: "declining"
+    enrollment_5yr_change: -0.12
+    completion_direction: "improving"
+    notable_changes:
+      - year: 2022
+        event: "Switched to corequisite math model"
+      - year: 2023
+        event: "Launched early alert system with ML predictions"
+```
+
+### Institutional Priorities
+
+```yaml
+  priorities:
+    strategic_plan_years: "2024-2029"
+    top_goals:
+      - "Increase fall-to-fall retention from 42% to 55%"
+      - "Launch 3 new short-term workforce certificates"
+      - "Close equity gap in gateway math by 50%"
+    accreditation_qep_topic: "Guided Pathways implementation"
+    grant_funded_initiatives:
+      - name: "Title III Strengthening Institutions"
+        focus: "Student support services and advising redesign"
+        end_date: "2027-09-30"
+```
+
+### Data Quality Notes
+
+```yaml
+  data_caveats:
+    - "Pre-2020 cohorts lack online/hybrid delivery classification"
+    - "Race/ethnicity is self-reported; 6% of records are 'Unknown'"
+    - "Transfer-out data relies on NSC match — ~85% match rate"
+```
+
+### Distillation and Training Config
+
+```yaml
+distillation:
+  teacher_model: "claude-sonnet-4-20250514"
+  teacher_backend: "anthropic"
+  local_teacher_model: "qwen3.5:27b"
+  local_teacher_backend: "ollama"
+  pairs_per_task: 1500
+
+training:
+  default_model: "qwen3.5:9b"
+  fallback_model: "qwen3.5:4b"
+  method: "qlora"
+  quantization: 4
+  lora_rank: 16
+  lora_alpha: 32
+  epochs: 3
+  learning_rate: 1.0e-4
+  batch_size: 4
+  warmup_steps: 100
+  eval_every: 50
+  early_stopping_patience: 3
+```
+
+## 5. Distillation — Teacher Prompts and Pair Generation
+
+### Two Adapters
+
+| Adapter | Replaces | Input | Output |
+|---------|----------|-------|--------|
+| **Explainer** | `/api/courses/explain-pairing` | Course pairing data | Structured explanation JSON |
+| **Summarizer** | `/api/query-summary` | Query + result rows | Structured summary JSON |
+
+### Teacher Prompt Strategy
+
+**Explainer teacher prompt:**
+
+The teacher model receives the full institutional context from config.yaml plus the course pairing data, and generates:
+
+```json
+{
+  "explanation": "2-3 sentence plain-language explanation",
+  "structural_factors": ["institutional/systemic factors"],
+  "student_impact": "what this means for students",
+  "advisor_recommendation": "actionable next step",
+  "data_limitations": ["caveats about this data"],
+  "related_intervention": "existing program that addresses this, or null"
+}
+```
+
+**Summarizer teacher prompt:**
+
+The teacher receives institutional context plus the original query and SQL result rows, and generates:
+
+```json
+{
+  "summary": "2-3 sentence headline finding",
+  "key_insights": ["notable patterns"],
+  "context": "how this connects to institutional priorities or known challenges",
+  "action_items": ["what someone should do with this information"],
+  "caveats": ["data limitations relevant to this query"]
+}
+```
+
+**Student prompts** (what the fine-tuned model sees at inference) are minimal — just the data input. All institutional context is baked into the weights during training.
+
+### Dual Teacher Support
+
+- **`--local` flag:** Uses Qwen 3.5 27B via Ollama for free iteration and pipeline testing
+- **Default:** Uses Claude Sonnet via Anthropic API for production-quality training data
+
+### Seed Data Sources
+
+1. **Database-driven (500 pairs per adapter):** Query the school's actual data for real course pairings and result sets
+2. **Template-driven (500 pairs per adapter):** From `seed_queries.yaml` with school-specific examples
+3. **Synthetic variation (500 pairs per adapter):** Pipeline varies dimensions (cohorts, programs, demographics) to reach 1,500 pairs per adapter
+
+**Total per school:** 3,000 training pairs. Distillation cost via Claude Sonnet: ~$15-25.
+
+## 6. Fine-Tuning
+
+### Method
+
+QLoRA via Apple MLX framework on Apple Silicon Macs.
+
+- Base model: Qwen 3.5 9B (default) or 4B (lightweight)
+- 4-bit quantized base, trainable low-rank adapters
+- Two separate adapters per school (explainer + summarizer) on the same base model
+
+### Hardware Requirements
+
+| Model | Training | Inference |
+|-------|----------|-----------|
+| Qwen 3.5 9B | 24GB+ RAM (M-series Mac) | 8GB+ RAM (Q4 via Ollama) |
+| Qwen 3.5 4B | 16GB+ RAM (M-series Mac) | 4GB+ RAM (Q4 via Ollama) |
+
+### Training Time Estimates (3,000 examples, 3 epochs)
+
+| Model | 18GB Mac (M3 Pro) | 36GB Mac (M3 Pro) |
+|-------|-------------------|-------------------|
+| Qwen 3.5 4B | ~2-4 hrs | ~1.5-3 hrs |
+| Qwen 3.5 9B | Tight, not recommended | ~3-5 hrs |
+
+## 7. Evaluation
+
+### Ship Criteria
+
+| Metric | What It Checks | Threshold |
+|--------|---------------|-----------|
+| JSON validity | Output parses as valid JSON | >= 95% |
+| Schema adherence | All required keys present, correct types | >= 90% |
+| Explanation quality | ROUGE-L against teacher outputs | >= 0.35 |
+| Factual grounding | Mentions data values from input, not hallucinated | >= 85% |
+| Actionability | Recommendations are non-generic | >= 80% |
+| Caveat inclusion | Data limitations populated | >= 90% |
+
+Pipeline refuses to export a model that fails any threshold.
+
+## 8. Deployment
+
+### Export to Ollama
+
+```bash
+python -m training.export --school bishop-state
+# Registers:
+#   bishop-state-explainer:9b
+#   bishop-state-summarizer:9b
+```
+
+### Dashboard Integration
+
+A thin adapter layer in `lib/model-client.ts` routes to the appropriate backend:
+
+```
+MODEL_BACKEND=ollama    → local fine-tuned model via Ollama
+MODEL_BACKEND=openai    → fallback to OpenAI GPT-4o-mini
+SCHOOL_CODE=bishop-state
+```
+
+Routes affected:
+
+| Route | Current | After |
+|-------|---------|-------|
+| `/api/courses/explain-pairing` | OpenAI GPT-4o-mini | `bishop-state-explainer:9b` via Ollama |
+| `/api/query-summary` | OpenAI GPT-4o-mini | `bishop-state-summarizer:9b` via Ollama |
+| `/api/analyze` | OpenAI GPT-4o-mini | No change (future adapter) |
+
+## 9. Onboarding a New School
+
+1. Create `schools/{school-code}/config.yaml` — fill in institutional context
+2. Create `schools/{school-code}/seed_queries.yaml` — 20-50 example questions
+3. Run the pipeline:
+   ```bash
+   python -m training.distill  --school {school-code} [--local]
+   python -m training.prepare  --school {school-code}
+   python -m training.finetune --school {school-code} --model 9b
+   python -m training.eval     --school {school-code}
+   python -m training.export   --school {school-code}
+   ```
+4. Set env vars: `MODEL_BACKEND=ollama SCHOOL_CODE={school-code}`
+5. Deploy dashboard
+
+## 10. Cost Summary
+
+| Item | Per School | One-Time |
+|------|-----------|----------|
+| Distillation (Claude Sonnet) | $15-25 | - |
+| Distillation (local Qwen) | $0 | - |
+| Fine-tuning (MLX on Mac) | $0 (electricity) | - |
+| Inference (Ollama) | $0 | - |
+| Base model download | - | ~6GB (cached) |
+
+**Total cost to onboard a new school: $15-25** (or $0 with local teacher).

From ae971e4b27fc81e2318a4cec8486ef19191c14c4 Mon Sep 17 00:00:00 2001
From: William Hill <mjh2225@gmail.com>
Date: Fri, 27 Mar 2026 21:33:54 -0400
Subject: [PATCH 04/15] docs: implementation plan for config-driven
 distillation pipeline

---
 .../plans/2026-03-27-distillation-pipeline.md | 3805 +++++++++++++++++
 1 file changed, 3805 insertions(+)
 create mode 100644 docs/superpowers/plans/2026-03-27-distillation-pipeline.md

diff --git a/docs/superpowers/plans/2026-03-27-distillation-pipeline.md b/docs/superpowers/plans/2026-03-27-distillation-pipeline.md
new file mode 100644
index 0000000..d492ae0
--- /dev/null
+++ b/docs/superpowers/plans/2026-03-27-distillation-pipeline.md
@@ -0,0 +1,3805 @@
+# Distillation Pipeline Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Build a config-driven pipeline that distills a teacher model's knowledge into per-school fine-tuned Qwen 3.5 models, replacing OpenAI API calls for course explanations and query summaries.
+
+**Architecture:** A 5-stage Python pipeline (distill → prepare → finetune → eval → export) reads per-school YAML configs, generates ChatML training pairs via Claude Sonnet or local Qwen 3.5, fine-tunes via MLX QLoRA, evaluates against ship criteria, and exports to Ollama. The Next.js dashboard swaps OpenAI calls for local Ollama inference via a thin model-client adapter.
+
+**Tech Stack:** Python 3.8+, PyYAML, Anthropic SDK, ollama (Python client), MLX/mlx-lm (Apple Silicon fine-tuning), pytest, Next.js/TypeScript (dashboard integration)
+
+**Spec:** `docs/superpowers/specs/2026-03-27-distillation-pipeline-design.md`
+
+**Reference implementation:** `~/Development/d4bl_ai_agent/scripts/training/` — the d4bl pipeline this adapts from.
+
+---
+
+## File Structure
+
+### New Files
+
+```
+training/
+  __init__.py                    # Package init
+  config.py                      # Constants + YAML config loader
+  prompts.py                     # Teacher prompt templates (explainer + summarizer)
+  seed.py                        # Seed data generation (DB + template + synthetic)
+  distill.py                     # Stage 1: Generate ChatML pairs via teacher model
+  prepare.py                     # Stage 2: Filter, dedup, split
+  finetune.py                    # Stage 3: MLX QLoRA fine-tuning
+  eval.py                        # Stage 4: Metrics + ship criteria
+  export.py                      # Stage 5: Ollama modelfile + registration
+
+schools/
+  bishop-state/
+    config.yaml                  # Full institutional config
+    seed_queries.yaml            # Example queries for training pair generation
+
+tests/
+  conftest.py                    # Pytest fixtures
+  training/
+    __init__.py
+    test_config.py               # Config loader tests
+    test_prompts.py              # Prompt template tests
+    test_seed.py                 # Seed generation tests
+    test_prepare.py              # Filter/dedup/split tests
+    test_eval.py                 # Eval metrics + ship criteria tests
+
+codebenders-dashboard/
+  lib/
+    model-client.ts              # New: Ollama/OpenAI adapter
+```
+
+### Modified Files
+
+```
+codebenders-dashboard/
+  app/api/courses/explain-pairing/route.ts  # Swap OpenAI → model-client
+  app/api/query-summary/route.ts            # Swap OpenAI → model-client
+
+requirements.txt                             # Add training dependencies
+.gitignore                                   # Add training_data/
+```
+
+---
+
+## Task 1: Project Scaffolding
+
+**Files:**
+- Create: `training/__init__.py`
+- Create: `tests/conftest.py`
+- Create: `tests/training/__init__.py`
+- Create: `pytest.ini`
+- Modify: `requirements.txt`
+- Modify: `.gitignore`
+
+- [ ] **Step 1: Create training package directory**
+
+```bash
+mkdir -p training tests/training
+```
+
+- [ ] **Step 2: Create package init files**
+
+Create `training/__init__.py`:
+```python
+"""Config-driven distillation pipeline for per-school fine-tuned models."""
+```
+
+Create `tests/__init__.py`:
+```python
+```
+
+Create `tests/training/__init__.py`:
+```python
+```
+
+- [ ] **Step 3: Create pytest.ini**
+
+```ini
+[pytest]
+testpaths = tests
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+```
+
+- [ ] **Step 4: Add training dependencies to requirements.txt**
+
+Append to `requirements.txt`:
+```
+# Training pipeline
+pyyaml>=6.0
+anthropic>=0.40.0
+ollama>=0.4.0
+rouge-score>=0.1.2
+mlx>=0.22.0
+mlx-lm>=0.20.0
+```
+
+- [ ] **Step 5: Add training_data to .gitignore**
+
+Append to `.gitignore`:
+```
+# Training pipeline artifacts
+training_data/
+```
+
+- [ ] **Step 6: Create conftest.py with shared fixtures**
+
+Create `tests/conftest.py`:
+```python
+"""Shared pytest fixtures for the training pipeline."""
+
+from pathlib import Path
+
+import pytest
+import yaml
+
+
+FIXTURES_DIR = Path(__file__).parent / "fixtures"
+
+
+@pytest.fixture
+def sample_school_config():
+    """Minimal valid school config for testing."""
+    return {
+        "school": {
+            "name": "Test Community College",
+            "code": "tcc",
+            "type": "community_college",
+            "designation": [],
+            "location": {
+                "city": "Test City",
+                "state": "Alabama",
+                "setting": "urban",
+            },
+            "enrollment": {
+                "total_headcount": 1000,
+                "percent_full_time": 0.50,
+                "percent_part_time": 0.50,
+            },
+            "demographics": {
+                "percent_pell_eligible": 0.60,
+                "percent_first_gen": 0.45,
+            },
+        },
+        "database": {
+            "main_table": "student_level_with_predictions",
+            "course_table": "course_enrollments",
+            "connection_env": "DATABASE_URL",
+        },
+        "schema": {
+            "student_columns": {
+                "Cohort": "Cohort year",
+                "Race": "Student race/ethnicity",
+                "Retention": "Retention indicator (0 or 1)",
+            },
+            "course_columns": {
+                "course_prefix": "Course dept code",
+                "grade": "Student grade",
+            },
+        },
+        "domain": {
+            "programs": [
+                {
+                    "name": "Nursing",
+                    "cip": "51.3801",
+                    "gateway_courses": ["BIO 201"],
+                }
+            ],
+            "key_metrics": ["retention_rate", "dfwi_rate"],
+            "terminology": {
+                "credential": "associate degree",
+                "at_risk": "at-risk students",
+            },
+        },
+        "distillation": {
+            "teacher_model": "claude-sonnet-4-20250514",
+            "teacher_backend": "anthropic",
+            "local_teacher_model": "qwen3.5:27b",
+            "local_teacher_backend": "ollama",
+            "pairs_per_task": 10,
+        },
+        "training": {
+            "default_model": "qwen3.5:9b",
+            "fallback_model": "qwen3.5:4b",
+            "method": "qlora",
+            "quantization": 4,
+            "lora_rank": 16,
+            "lora_alpha": 32,
+            "epochs": 3,
+            "learning_rate": 1e-4,
+            "batch_size": 4,
+            "warmup_steps": 100,
+            "eval_every": 50,
+            "early_stopping_patience": 3,
+        },
+    }
+
+
+@pytest.fixture
+def sample_course_pairing_data():
+    """Sample course pairing input for explainer adapter."""
+    return {
+        "course_a": {"prefix": "MAT", "number": "100", "name": "Intermediate Algebra"},
+        "course_b": {"prefix": "BIO", "number": "201", "name": "Anatomy & Physiology I"},
+        "stats": {
+            "course_a_dfwi": 0.42,
+            "course_b_dfwi": 0.31,
+            "co_enrollment_count": 85,
+            "co_enrollment_dfwi": 0.38,
+            "delivery_breakdown": [
+                {"method": "Face-to-Face", "count": 50, "dfwi_rate": 0.34},
+                {"method": "Online", "count": 35, "dfwi_rate": 0.44},
+            ],
+        },
+    }
+
+
+@pytest.fixture
+def sample_query_result_data():
+    """Sample query result input for summarizer adapter."""
+    return {
+        "prompt": "retention rate by race for 2023 cohort",
+        "data": [
+            {"Race": "Black", "retention_rate": 0.41},
+            {"Race": "White", "retention_rate": 0.52},
+            {"Race": "Hispanic", "retention_rate": 0.47},
+        ],
+        "rowCount": 3,
+        "vizType": "bar",
+    }
+
+
+@pytest.fixture
+def sample_explainer_output():
+    """Valid explainer adapter JSON output."""
+    return {
+        "explanation": "MAT 100 and BIO 201 show a high co-enrollment DFWI rate of 38%.",
+        "structural_factors": [
+            "Math placement gaps from feeder high schools",
+            "Online sections show higher DFW rates",
+        ],
+        "student_impact": "Students taking both courses simultaneously face compounded difficulty.",
+        "advisor_recommendation": "Consider staggering MAT 100 and BIO 201 across terms for at-risk students.",
+        "data_limitations": ["Co-enrollment data limited to 2020+ cohorts"],
+        "related_intervention": "Math Bootcamp",
+    }
+
+
+@pytest.fixture
+def sample_summarizer_output():
+    """Valid summarizer adapter JSON output."""
+    return {
+        "summary": "Retention rates vary significantly by race in the 2023 cohort.",
+        "key_insights": [
+            "Black students have the lowest retention rate at 41%",
+            "11-point gap between Black and White student retention",
+        ],
+        "context": "This aligns with the institution's strategic goal to close equity gaps.",
+        "action_items": [
+            "Review early alert referrals for Black male students in Fall cohort",
+        ],
+        "caveats": ["Race is self-reported; 6% of records are Unknown"],
+    }
+```
+
+- [ ] **Step 7: Verify pytest runs with no errors**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon && venv/bin/python -m pytest tests/ -v --co`
+Expected: "no tests ran" (collected 0 items) with exit code 0
+
+- [ ] **Step 8: Commit**
+
+```bash
+git add training/ tests/ pytest.ini requirements.txt .gitignore
+git commit -m "chore: scaffold training pipeline package and test infrastructure"
+```
+
+---
+
+## Task 2: Config Loader
+
+**Files:**
+- Create: `training/config.py`
+- Create: `tests/training/test_config.py`
+
+- [ ] **Step 1: Write the failing tests**
+
+Create `tests/training/test_config.py`:
+```python
+"""Tests for training.config — constants and school config loader."""
+
+import pytest
+import yaml
+from pathlib import Path
+from unittest.mock import patch
+
+from training.config import (
+    BASE_DIR,
+    SCHOOLS_DIR,
+    TRAIN_RATIO,
+    VAL_RATIO,
+    TEST_RATIO,
+    load_school_config,
+    get_school_dir,
+    get_training_data_dir,
+    write_jsonl,
+)
+
+
+class TestConstants:
+    def test_split_ratios_sum_to_one(self):
+        assert TRAIN_RATIO + VAL_RATIO + TEST_RATIO == pytest.approx(1.0)
+
+    def test_base_dir_is_path(self):
+        assert isinstance(BASE_DIR, Path)
+
+    def test_schools_dir_is_path(self):
+        assert isinstance(SCHOOLS_DIR, Path)
+
+
+class TestLoadSchoolConfig:
+    def test_loads_valid_config(self, tmp_path, sample_school_config):
+        school_dir = tmp_path / "test-school"
+        school_dir.mkdir()
+        config_path = school_dir / "config.yaml"
+        config_path.write_text(yaml.dump(sample_school_config))
+
+        with patch("training.config.SCHOOLS_DIR", tmp_path):
+            config = load_school_config("test-school")
+
+        assert config["school"]["name"] == "Test Community College"
+        assert config["school"]["code"] == "tcc"
+        assert config["database"]["main_table"] == "student_level_with_predictions"
+
+    def test_raises_on_missing_school(self, tmp_path):
+        with patch("training.config.SCHOOLS_DIR", tmp_path):
+            with pytest.raises(FileNotFoundError, match="School config not found"):
+                load_school_config("nonexistent")
+
+    def test_raises_on_missing_required_keys(self, tmp_path):
+        school_dir = tmp_path / "bad-school"
+        school_dir.mkdir()
+        config_path = school_dir / "config.yaml"
+        config_path.write_text(yaml.dump({"school": {"name": "Bad"}}))
+
+        with patch("training.config.SCHOOLS_DIR", tmp_path):
+            with pytest.raises(ValueError, match="Missing required"):
+                load_school_config("bad-school")
+
+
+class TestGetSchoolDir:
+    def test_returns_path(self, tmp_path):
+        with patch("training.config.SCHOOLS_DIR", tmp_path):
+            result = get_school_dir("bishop-state")
+        assert result == tmp_path / "bishop-state"
+
+
+class TestGetTrainingDataDir:
+    def test_returns_path_with_school(self):
+        result = get_training_data_dir("bishop-state")
+        assert "bishop-state" in str(result)
+        assert result.name == "bishop-state"
+
+
+class TestWriteJsonl:
+    def test_writes_items(self, tmp_path):
+        import json
+
+        items = [{"a": 1}, {"b": 2}]
+        outfile = tmp_path / "test.jsonl"
+        count = write_jsonl(items, outfile)
+
+        assert count == 2
+        lines = outfile.read_text().strip().split("\n")
+        assert json.loads(lines[0]) == {"a": 1}
+        assert json.loads(lines[1]) == {"b": 2}
+
+    def test_writes_with_transform(self, tmp_path):
+        import json
+
+        items = [1, 2, 3]
+        outfile = tmp_path / "test.jsonl"
+        count = write_jsonl(items, outfile, transform=lambda x: {"val": x * 2})
+
+        assert count == 3
+        lines = outfile.read_text().strip().split("\n")
+        assert json.loads(lines[0]) == {"val": 2}
+
+    def test_skips_none_from_transform(self, tmp_path):
+        items = [1, 2, 3]
+        outfile = tmp_path / "test.jsonl"
+        count = write_jsonl(items, outfile, transform=lambda x: None if x == 2 else {"v": x})
+
+        assert count == 2
+
+    def test_creates_parent_dirs(self, tmp_path):
+        outfile = tmp_path / "sub" / "dir" / "test.jsonl"
+        count = write_jsonl([{"x": 1}], outfile)
+        assert count == 1
+        assert outfile.exists()
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon && venv/bin/python -m pytest tests/training/test_config.py -v`
+Expected: FAIL — `ModuleNotFoundError: No module named 'training.config'`
+
+- [ ] **Step 3: Write the implementation**
+
+Create `training/config.py`:
+```python
+"""Shared constants and school config loader for the training pipeline."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any, Callable, Optional
+
+import yaml
+
+# ---------------------------------------------------------------------------
+# Directory layout
+# ---------------------------------------------------------------------------
+
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+SCHOOLS_DIR = PROJECT_ROOT / "schools"
+BASE_DIR = PROJECT_ROOT / "training_data"
+
+# ---------------------------------------------------------------------------
+# Dataset split ratios
+# ---------------------------------------------------------------------------
+
+TRAIN_RATIO = 0.80
+VAL_RATIO = 0.10
+TEST_RATIO = 0.10
+
+# ---------------------------------------------------------------------------
+# Deduplication
+# ---------------------------------------------------------------------------
+
+JACCARD_THRESHOLD = 1.0  # Exact duplicates only
+
+# ---------------------------------------------------------------------------
+# Required top-level keys in school config
+# ---------------------------------------------------------------------------
+
+_REQUIRED_KEYS = {"school", "database", "schema", "domain", "distillation", "training"}
+
+
+# ---------------------------------------------------------------------------
+# Config loader
+# ---------------------------------------------------------------------------
+
+
+def load_school_config(school: str) -> dict[str, Any]:
+    """Load and validate a school's config.yaml.
+
+    Args:
+        school: School directory name (e.g. "bishop-state").
+
+    Returns:
+        Parsed config dict.
+
+    Raises:
+        FileNotFoundError: If the school directory or config.yaml doesn't exist.
+        ValueError: If required top-level keys are missing.
+    """
+    config_path = SCHOOLS_DIR / school / "config.yaml"
+    if not config_path.exists():
+        raise FileNotFoundError(
+            f"School config not found: {config_path}"
+        )
+
+    with config_path.open("r", encoding="utf-8") as fh:
+        config = yaml.safe_load(fh)
+
+    missing = _REQUIRED_KEYS - set(config.keys())
+    if missing:
+        raise ValueError(
+            f"Missing required top-level keys in {config_path}: {missing}"
+        )
+
+    return config
+
+
+def get_school_dir(school: str) -> Path:
+    """Return the path to a school's config directory."""
+    return SCHOOLS_DIR / school
+
+
+def get_training_data_dir(school: str) -> Path:
+    """Return the path to a school's training data directory."""
+    return BASE_DIR / school
+
+
+# ---------------------------------------------------------------------------
+# JSONL writer (adapted from d4bl)
+# ---------------------------------------------------------------------------
+
+
+def write_jsonl(
+    items: list,
+    outfile: Path,
+    transform: Optional[Callable] = None,
+) -> int:
+    """Write items to a JSONL file.
+
+    Args:
+        items: List of JSON-serializable objects.
+        outfile: Destination file path.
+        transform: Optional per-item transformation; returning None skips.
+
+    Returns:
+        Number of lines written.
+    """
+    outfile = Path(outfile)
+    outfile.parent.mkdir(parents=True, exist_ok=True)
+    count = 0
+    with outfile.open("w", encoding="utf-8") as fh:
+        for item in items:
+            if transform is not None:
+                item = transform(item)
+            if item is None:
+                continue
+            fh.write(json.dumps(item, ensure_ascii=False) + "\n")
+            count += 1
+    return count
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon && venv/bin/python -m pytest tests/training/test_config.py -v`
+Expected: All tests PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add training/config.py tests/training/test_config.py
+git commit -m "feat(training): config loader with YAML validation and JSONL writer"
+```
+
+---
+
+## Task 3: Bishop State School Config
+
+**Files:**
+- Create: `schools/bishop-state/config.yaml`
+- Create: `schools/bishop-state/seed_queries.yaml`
+
+- [ ] **Step 1: Create the school directory**
+
+```bash
+mkdir -p schools/bishop-state
+```
+
+- [ ] **Step 2: Write config.yaml**
+
+Create `schools/bishop-state/config.yaml` with the full institutional config from the design spec. This is a data file — the schema was validated in Task 2's tests. Include all sections: school identity, location, enrollment, demographics, database schema (copying exact columns from `route.ts` SCHEMA_INFO), domain knowledge, workforce, peers, financial, completion, instruction, pipeline, technology, access, equity, interventions, student_life, health, patterns, trends, priorities, data_caveats, distillation, and training config.
+
+```yaml
+# Bishop State Community College — Training Pipeline Config
+# See docs/superpowers/specs/2026-03-27-distillation-pipeline-design.md
+
+school:
+  name: "Bishop State Community College"
+  code: "bscc"
+  type: "community_college"
+  designation: ["hbcu", "minority_serving"]
+  accreditation: "SACSCOC"
+  founded: 1927
+
+  location:
+    address: "351 North Broad Street"
+    city: "Mobile"
+    state: "Alabama"
+    zip: "36603"
+    county: "Mobile County"
+    region: "Gulf Coast"
+    setting: "urban"
+    climate_zone: "subtropical"
+
+  enrollment:
+    total_headcount: 4200
+    fte: 2800
+    undergraduate_only: true
+    residential: false
+    percent_full_time: 0.42
+    percent_part_time: 0.58
+    percent_online: 0.35
+    open_admission: true
+
+  demographics:
+    percent_black: 0.72
+    percent_white: 0.18
+    percent_hispanic: 0.05
+    percent_other: 0.05
+    percent_pell_eligible: 0.68
+    percent_first_gen: 0.55
+    percent_adult_learners: 0.40
+    median_household_income_area: 42000
+
+  workforce:
+    top_employers: ["Austal USA", "Mobile Infirmary", "AM/NS Calvert"]
+    high_demand_fields: ["healthcare", "advanced_manufacturing", "maritime"]
+    workforce_board: "Mobile Works"
+
+  academics:
+    calendar: "semester"
+    degree_types: ["associate", "certificate", "short_certificate"]
+    total_programs: 45
+    largest_programs: ["Nursing", "Welding", "Business Administration"]
+    transfer_partners: ["University of South Alabama", "Alabama A&M"]
+    dual_enrollment: true
+
+  student_support:
+    tutoring: true
+    food_pantry: true
+    childcare: false
+    transportation_assistance: true
+    mental_health_services: true
+    early_alert_system: true
+
+  challenges:
+    - "High percentage of students working 20+ hours/week"
+    - "Limited public transit access to satellite campuses"
+    - "Hurricane season disrupts Fall semester attendance"
+    - "Many students require developmental education in math"
+
+  strengths:
+    - "Strong employer partnerships in healthcare and maritime"
+    - "Active student mentoring program"
+    - "High nursing program pass rates on NCLEX"
+
+  peers:
+    ipeds_id: "101505"
+    carnegie_class: "Associate's—High Transfer-High Traditional"
+    peer_institutions: ["Lawson State CC", "Shelton State CC", "Trenholm State CC"]
+    state_system: "Alabama Community College System"
+    governing_board: "ACCS Board of Trustees"
+
+  financial:
+    in_district_tuition: 4800
+    in_state_tuition: 4800
+    avg_financial_aid_package: 5200
+    percent_receiving_aid: 0.82
+    percent_student_loans: 0.25
+    cost_of_living_index: 87.3
+    textbook_program: "inclusive_access"
+    tuition_payment_plan: true
+    emergency_aid_fund: true
+
+  completion:
+    ipeds_graduation_rate: 0.18
+    adjusted_completion_rate: 0.42
+    avg_time_to_credential: 3.2
+    percent_transfer_out: 0.24
+    percent_stop_out_return: 0.15
+    top_completion_barriers:
+      - "developmental_math_sequences"
+      - "financial_emergencies"
+      - "work_schedule_conflicts"
+
+  instruction:
+    student_faculty_ratio: 18
+    percent_full_time_faculty: 0.45
+    percent_adjunct: 0.55
+    avg_class_size: 22
+    developmental_ed_model: "corequisite"
+    lms: "Canvas"
+
+  pipeline:
+    feeder_high_schools:
+      - name: "Williamson High School"
+        percent_of_enrollment: 0.12
+        avg_readiness: "below_college_level"
+      - name: "Murphy High School"
+        percent_of_enrollment: 0.08
+        avg_readiness: "mixed"
+    percent_ged: 0.11
+    percent_dual_enrollment_origin: 0.09
+    percent_veterans: 0.07
+    percent_career_changers: 0.14
+    percent_displaced_workers: 0.05
+    percent_international: 0.02
+    primary_recruitment_radius_miles: 35
+
+  technology:
+    percent_students_with_reliable_wifi: 0.71
+    percent_students_with_personal_laptop: 0.64
+    campus_device_lending: true
+    hotspot_lending: true
+    digital_literacy_required: false
+    broadband_desert_overlap: true
+
+  access:
+    campus_count: 4
+    campuses:
+      - name: "Main Campus"
+        address: "351 N Broad St"
+        public_transit_accessible: true
+      - name: "Southwest Campus"
+        address: "925 Dauphin Island Pkwy"
+        public_transit_accessible: false
+    percent_students_commute_30_plus_min: 0.35
+    public_transit_quality: "limited"
+    parking_adequate: true
+    evening_weekend_classes: true
+
+  equity:
+    known_gaps:
+      - metric: "gateway_math_pass_rate"
+        group_a: { name: "Black male students", value: 0.41 }
+        group_b: { name: "Overall", value: 0.58 }
+        initiative: "Male Student Success mentoring program"
+      - metric: "retention"
+        group_a: { name: "Part-time students", value: 0.38 }
+        group_b: { name: "Full-time students", value: 0.61 }
+        initiative: "15-to-Finish advising campaign"
+    dei_office: true
+    title_ix_coordinator: true
+    minority_male_initiative: "Brother 2 Brother"
+
+  interventions:
+    active:
+      - name: "Starfish Early Alert"
+        type: "early_warning"
+        target: "all students"
+        trigger: "missed 2+ classes or below C at midterm"
+        effectiveness: "12% retention lift in pilot cohorts"
+      - name: "Math Bootcamp"
+        type: "academic_support"
+        target: "students placing into developmental math"
+        timing: "2 weeks before Fall semester"
+        effectiveness: "participants 2x more likely to pass MAT 100"
+      - name: "Emergency Micro-Grants"
+        type: "financial"
+        target: "students facing unexpected financial hardship"
+        max_award: 500
+        effectiveness: "78% of recipients re-enrolled next term"
+    planned:
+      - name: "Proactive advising for 25+ credit students"
+        launch: "Fall 2026"
+
+  student_life:
+    percent_working_while_enrolled: 0.72
+    percent_working_over_20hrs: 0.48
+    percent_single_parents: 0.18
+    percent_caregiver_responsibilities: 0.25
+    childcare_waitlist: true
+    student_orgs: 15
+    athletics: false
+    housing_insecurity_rate: 0.14
+    food_insecurity_rate: 0.31
+
+  health:
+    mental_health_counselor_ratio: "1:1400"
+    community_health_context:
+      - "Mobile County has highest diabetes rate in Alabama"
+      - "Limited mental health providers in service area"
+    substance_abuse_programs: true
+    crisis_intervention_protocol: true
+
+  outcomes:
+    job_placement_rate_6mo: 0.78
+    median_salary_after_credential:
+      associate: 34000
+      certificate: 29000
+    percent_employed_in_field: 0.65
+    licensure_pass_rates:
+      nursing_nclex: 0.89
+      welding_aws: 0.92
+      emt: 0.85
+    transfer_success_rate: 0.71
+    employer_satisfaction_rate: 0.88
+
+  patterns:
+    high_attrition_points:
+      - week: 4
+        reason: "Financial aid disbursement delays"
+      - week: 8
+        reason: "Midterm performance shock"
+      - month: "October"
+        reason: "Hurricane season peak"
+    registration_peaks: ["April", "July", "November"]
+    summer_melt_rate: 0.22
+
+  trends:
+    enrollment_direction: "declining"
+    enrollment_5yr_change: -0.12
+    completion_direction: "improving"
+    notable_changes:
+      - year: 2020
+        event: "COVID shift to online — permanent hybrid expansion"
+      - year: 2022
+        event: "Switched to corequisite math model — dev-ed pass rates doubled"
+      - year: 2023
+        event: "Launched early alert system with ML predictions"
+
+  priorities:
+    strategic_plan_years: "2024-2029"
+    top_goals:
+      - "Increase fall-to-fall retention from 42% to 55%"
+      - "Launch 3 new short-term workforce certificates"
+      - "Close equity gap in gateway math by 50%"
+    accreditation_qep_topic: "Guided Pathways implementation"
+    grant_funded_initiatives:
+      - name: "Title III Strengthening Institutions"
+        focus: "Student support services and advising redesign"
+        end_date: "2027-09-30"
+      - name: "NSF ATE Grant"
+        focus: "Advanced manufacturing curriculum"
+        end_date: "2026-05-31"
+
+  data_caveats:
+    - "Pre-2020 cohorts lack online/hybrid delivery classification"
+    - "Race/ethnicity is self-reported; 6% of records are 'Unknown'"
+    - "GPA data for dual-enrollment students may reflect high school scale"
+    - "Transfer-out data relies on National Student Clearinghouse match — ~85% match rate"
+    - "Course enrollment records before 2019 do not include instructor_status"
+
+database:
+  main_table: "student_level_with_predictions"
+  course_table: "course_enrollments"
+  connection_env: "DATABASE_URL"
+
+schema:
+  student_columns:
+    Cohort: "Cohort year (numeric: 2019, 2020, etc.)"
+    Cohort_Term: "Term of cohort entry (Fall, Spring, Summer)"
+    Student_GUID: "Unique student identifier"
+    Institution_ID: "Institution identifier (102030 for Bishop State)"
+    Gender: "Student gender"
+    Race: "Student race/ethnicity"
+    Student_Age: "Age of student (integer)"
+    First_Gen: "First generation status"
+    Enrollment_Type: "Type of enrollment"
+    Enrollment_Intensity_First_Term: "Enrollment intensity (Full-Time, Part-Time)"
+    Program_of_Study_Year_1: "Program of study in year 1 (CIP code)"
+    Credential_Type_Sought_Year_1: "Credential type being pursued"
+    Math_Placement: "Math placement level (C=college-level, R=remedial, N=none)"
+    Retention: "Retention indicator (0 or 1)"
+    Persistence: "Persistence indicator (0 or 1)"
+    GPA_Group_Year_1: "GPA in year 1"
+    GPA_Group_Term_1: "GPA in term 1"
+    Number_of_Credits_Attempted_Year_1: "Credits attempted in year 1"
+    Number_of_Credits_Earned_Year_1: "Credits earned in year 1"
+    Number_of_Credits_Attempted_Year_2: "Credits attempted in year 2"
+    Number_of_Credits_Earned_Year_2: "Credits earned in year 2"
+    Time_to_Credential: "Time to any credential"
+    retention_probability: "Predicted probability of retention (0-1)"
+    retention_risk_category: "Risk category (Low/Moderate/High/Critical Risk)"
+    at_risk_alert: "Early warning alert level (LOW/MODERATE/HIGH/URGENT)"
+    course_completion_rate: "Course completion rate (0-1)"
+    passing_rate: "Course passing rate (0-1)"
+  course_columns:
+    course_prefix: "Course dept code (MAT, ENG, NUR, CIS, etc.)"
+    course_number: "Course number (100, 201, etc.)"
+    course_name: "Full course name"
+    grade: "Student grade (A, B, C, D, F, W, I, AU, P)"
+    delivery_method: "Delivery (F=face-to-face, O=online, H=hybrid)"
+    instructor_status: "Instructor type (FT=full-time, PT=part-time)"
+    gateway_type: "Gateway (M=math, E=English, N=not a gateway)"
+    credits_attempted: "Credits attempted (numeric)"
+    credits_earned: "Credits earned (numeric)"
+    cohort: "Cohort year as text"
+    academic_year: "Academic year (e.g. 2021-22)"
+    academic_term: "Term (FALL, SPRING, SUMMER)"
+  ferpa_excluded:
+    - "Student_GUID"
+    - "student_guid"
+
+domain:
+  programs:
+    - name: "Nursing (ADN)"
+      cip: "51.3801"
+      gateway_courses: ["BIO 201", "MAT 110"]
+    - name: "Welding Technology"
+      cip: "48.0508"
+      gateway_courses: ["WDT 108", "WDT 109"]
+    - name: "Business Administration"
+      cip: "52.0201"
+      gateway_courses: ["MAT 100", "BUS 241"]
+    - name: "Computer Information Systems"
+      cip: "11.0101"
+      gateway_courses: ["CIS 146", "MAT 100"]
+    - name: "Emergency Medical Technician"
+      cip: "51.0904"
+      gateway_courses: ["EMS 100", "BIO 201"]
+  key_metrics:
+    - "retention_rate"
+    - "dfwi_rate"
+    - "gateway_pass_rate"
+    - "completion_rate"
+    - "transfer_rate"
+  terminology:
+    credential: "associate degree or certificate"
+    at_risk: "students flagged by early warning system"
+    gateway_course: "first college-level course in math or English"
+    dfwi: "grades of D, F, W, or I (unsuccessful completion)"
+
+distillation:
+  teacher_model: "claude-sonnet-4-20250514"
+  teacher_backend: "anthropic"
+  local_teacher_model: "qwen3.5:27b"
+  local_teacher_backend: "ollama"
+  pairs_per_task: 1500
+
+training:
+  default_model: "qwen3.5:9b"
+  fallback_model: "qwen3.5:4b"
+  method: "qlora"
+  quantization: 4
+  lora_rank: 16
+  lora_alpha: 32
+  epochs: 3
+  learning_rate: 1.0e-4
+  batch_size: 4
+  warmup_steps: 100
+  eval_every: 50
+  early_stopping_patience: 3
+```
+
+- [ ] **Step 3: Write seed_queries.yaml**
+
+Create `schools/bishop-state/seed_queries.yaml`:
+```yaml
+# Example queries for training pair generation
+# These seed the template-driven portion of distillation.
+
+explainer:
+  # Advisor-perspective queries
+  - query: "MAT 100 and BIO 201 pairing for nursing students"
+    style: "advisor"
+  - query: "ENG 101 and HIS 201 co-enrollment outcomes"
+    style: "advisor"
+  - query: "High DFW in MAT 110 for part-time evening students"
+    style: "advisor"
+  - query: "CIS 146 and MAT 100 pairing for CIS majors"
+    style: "advisor"
+  - query: "WDT 108 and WDT 109 sequential outcomes"
+    style: "advisor"
+
+  # Administrator-perspective queries
+  - query: "Online vs face-to-face outcomes in gateway math"
+    style: "administrator"
+  - query: "Adjunct vs full-time instructor DFW rates in BIO 201"
+    style: "administrator"
+  - query: "Summer vs Fall section outcomes for ENG 101"
+    style: "administrator"
+  - query: "Developmental math co-enrollment with science courses"
+    style: "administrator"
+  - query: "Dual-enrollment student performance in college-level courses"
+    style: "administrator"
+
+  # Faculty-perspective queries
+  - query: "EMS 100 and BIO 201 prerequisite outcomes"
+    style: "faculty"
+  - query: "MAT 100 withdrawal patterns by week of semester"
+    style: "faculty"
+  - query: "Hybrid delivery outcomes in nursing prerequisite courses"
+    style: "faculty"
+
+summarizer:
+  # Retention and completion
+  - query: "retention rate by race for 2023 cohort"
+    style: "faculty"
+  - query: "overall retention trend from 2019 to 2023"
+    style: "administrator"
+  - query: "retention rate for first-generation students"
+    style: "advisor"
+  - query: "completion rate by enrollment intensity"
+    style: "administrator"
+
+  # Course performance
+  - query: "gateway course pass rates by delivery method"
+    style: "administrator"
+  - query: "top 10 courses with highest DFW rates"
+    style: "faculty"
+  - query: "DFW rates by instructor status in math courses"
+    style: "administrator"
+  - query: "course completion rates for online vs face-to-face"
+    style: "faculty"
+
+  # Demographics and equity
+  - query: "enrollment by race and gender"
+    style: "administrator"
+  - query: "GPA distribution for Pell-eligible students"
+    style: "advisor"
+  - query: "retention gap between full-time and part-time students"
+    style: "administrator"
+  - query: "at-risk student count by program"
+    style: "advisor"
+
+  # Risk and intervention
+  - query: "students with URGENT early warning alert by cohort"
+    style: "advisor"
+  - query: "average retention probability by math placement"
+    style: "faculty"
+  - query: "critical risk students in nursing program"
+    style: "advisor"
+```
+
+- [ ] **Step 4: Verify config loads correctly**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon && venv/bin/python -c "from training.config import load_school_config; c = load_school_config('bishop-state'); print(f'Loaded: {c[\"school\"][\"name\"]}')"`
+Expected: `Loaded: Bishop State Community College`
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add schools/
+git commit -m "feat(training): add Bishop State school config and seed queries"
+```
+
+---
+
+## Task 4: Teacher Prompt Templates
+
+**Files:**
+- Create: `training/prompts.py`
+- Create: `tests/training/test_prompts.py`
+
+- [ ] **Step 1: Write the failing tests**
+
+Create `tests/training/test_prompts.py`:
+```python
+"""Tests for training.prompts — teacher prompt templates."""
+
+import json
+import pytest
+
+from training.prompts import (
+    build_system_prompt,
+    build_explainer_prompt,
+    build_summarizer_prompt,
+    EXPLAINER_STUDENT_SYSTEM,
+    SUMMARIZER_STUDENT_SYSTEM,
+    EXPLAINER_SCHEMA,
+    SUMMARIZER_SCHEMA,
+)
+
+
+class TestBuildSystemPrompt:
+    def test_includes_school_name(self, sample_school_config):
+        result = build_system_prompt(sample_school_config)
+        assert "Test Community College" in result
+
+    def test_includes_location(self, sample_school_config):
+        result = build_system_prompt(sample_school_config)
+        assert "Test City" in result
+        assert "Alabama" in result
+
+    def test_includes_demographics(self, sample_school_config):
+        result = build_system_prompt(sample_school_config)
+        assert "Pell" in result or "pell" in result
+
+    def test_returns_string(self, sample_school_config):
+        result = build_system_prompt(sample_school_config)
+        assert isinstance(result, str)
+        assert len(result) > 100
+
+
+class TestBuildExplainerPrompt:
+    def test_includes_course_data(self, sample_school_config, sample_course_pairing_data):
+        result = build_explainer_prompt(sample_school_config, sample_course_pairing_data)
+        assert "MAT" in result
+        assert "BIO" in result
+
+    def test_includes_stats(self, sample_school_config, sample_course_pairing_data):
+        result = build_explainer_prompt(sample_school_config, sample_course_pairing_data)
+        assert "0.42" in result or "42" in result
+
+    def test_includes_output_schema(self, sample_school_config, sample_course_pairing_data):
+        result = build_explainer_prompt(sample_school_config, sample_course_pairing_data)
+        assert "explanation" in result
+        assert "structural_factors" in result
+        assert "advisor_recommendation" in result
+
+    def test_returns_string(self, sample_school_config, sample_course_pairing_data):
+        result = build_explainer_prompt(sample_school_config, sample_course_pairing_data)
+        assert isinstance(result, str)
+
+
+class TestBuildSummarizerPrompt:
+    def test_includes_query(self, sample_school_config, sample_query_result_data):
+        result = build_summarizer_prompt(sample_school_config, sample_query_result_data)
+        assert "retention rate by race" in result
+
+    def test_includes_data(self, sample_school_config, sample_query_result_data):
+        result = build_summarizer_prompt(sample_school_config, sample_query_result_data)
+        assert "Black" in result
+        assert "0.41" in result or "41" in result
+
+    def test_includes_output_schema(self, sample_school_config, sample_query_result_data):
+        result = build_summarizer_prompt(sample_school_config, sample_query_result_data)
+        assert "summary" in result
+        assert "key_insights" in result
+        assert "action_items" in result
+
+    def test_returns_string(self, sample_school_config, sample_query_result_data):
+        result = build_summarizer_prompt(sample_school_config, sample_query_result_data)
+        assert isinstance(result, str)
+
+
+class TestStudentPrompts:
+    def test_explainer_student_system_is_concise(self):
+        assert len(EXPLAINER_STUDENT_SYSTEM) < 500
+        assert "JSON" in EXPLAINER_STUDENT_SYSTEM
+
+    def test_summarizer_student_system_is_concise(self):
+        assert len(SUMMARIZER_STUDENT_SYSTEM) < 500
+        assert "JSON" in SUMMARIZER_STUDENT_SYSTEM
+
+
+class TestOutputSchemas:
+    def test_explainer_schema_has_required_keys(self):
+        required = {"explanation", "structural_factors", "student_impact",
+                     "advisor_recommendation", "data_limitations", "related_intervention"}
+        assert required == set(EXPLAINER_SCHEMA.keys())
+
+    def test_summarizer_schema_has_required_keys(self):
+        required = {"summary", "key_insights", "context", "action_items", "caveats"}
+        assert required == set(SUMMARIZER_SCHEMA.keys())
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon && venv/bin/python -m pytest tests/training/test_prompts.py -v`
+Expected: FAIL — `ModuleNotFoundError: No module named 'training.prompts'`
+
+- [ ] **Step 3: Write the implementation**
+
+Create `training/prompts.py`:
+```python
+"""Teacher prompt templates for the distillation pipeline.
+
+Provides school-agnostic prompt builders that inject per-school context
+from config.yaml to generate high-quality training pairs.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+# ---------------------------------------------------------------------------
+# Output schemas — define what the fine-tuned model produces
+# ---------------------------------------------------------------------------
+
+EXPLAINER_SCHEMA = {
+    "explanation": "2-3 sentence plain-language explanation of the course pairing pattern",
+    "structural_factors": ["list of institutional or systemic factors driving this pattern"],
+    "student_impact": "what this means for students taking these courses",
+    "advisor_recommendation": "one actionable next step for advisors",
+    "data_limitations": ["caveats about interpreting this data"],
+    "related_intervention": "existing program that addresses this, or null",
+}
+
+SUMMARIZER_SCHEMA = {
+    "summary": "2-3 sentence headline finding from the query results",
+    "key_insights": ["list of notable patterns in the data"],
+    "context": "how this connects to institutional priorities or known challenges",
+    "action_items": ["what someone should do with this information"],
+    "caveats": ["data limitations relevant to this specific query"],
+}
+
+# ---------------------------------------------------------------------------
+# Student system prompts (what the fine-tuned model sees at inference)
+# ---------------------------------------------------------------------------
+
+EXPLAINER_STUDENT_SYSTEM = (
+    "You are a student success analyst. Given course pairing data, generate a "
+    "structured JSON explanation. Include: explanation, structural_factors, "
+    "student_impact, advisor_recommendation, data_limitations, and "
+    "related_intervention. Respond with ONLY valid JSON."
+)
+
+SUMMARIZER_STUDENT_SYSTEM = (
+    "You are a student success analyst. Given a query and its results, generate "
+    "a structured JSON summary. Include: summary, key_insights, context, "
+    "action_items, and caveats. Respond with ONLY valid JSON."
+)
+
+# ---------------------------------------------------------------------------
+# Context builder — extracts relevant sections from school config
+# ---------------------------------------------------------------------------
+
+
+def build_system_prompt(config: dict[str, Any]) -> str:
+    """Build the teacher system prompt with full institutional context.
+
+    Injects school identity, demographics, challenges, interventions,
+    equity gaps, and priorities from the school config.
+
+    Args:
+        config: Parsed school config dict.
+
+    Returns:
+        System prompt string for the teacher model.
+    """
+    school = config["school"]
+    domain = config["domain"]
+
+    sections = []
+
+    # Identity
+    name = school["name"]
+    location = school.get("location", {})
+    city = location.get("city", "")
+    state = location.get("state", "")
+    school_type = school.get("type", "institution")
+    sections.append(
+        f"You are a student success analyst at {name}, "
+        f"a {school_type} in {city}, {state}."
+    )
+
+    # Designation
+    designations = school.get("designation", [])
+    if designations:
+        sections.append(f"Institutional designations: {', '.join(designations)}.")
+
+    # Enrollment
+    enrollment = school.get("enrollment", {})
+    if enrollment:
+        parts = []
+        if "total_headcount" in enrollment:
+            parts.append(f"{enrollment['total_headcount']:,} students")
+        if "percent_part_time" in enrollment:
+            parts.append(f"{enrollment['percent_part_time']:.0%} part-time")
+        if "percent_online" in enrollment:
+            parts.append(f"{enrollment['percent_online']:.0%} online")
+        if enrollment.get("open_admission"):
+            parts.append("open admission")
+        if parts:
+            sections.append(f"Enrollment profile: {', '.join(parts)}.")
+
+    # Demographics
+    demographics = school.get("demographics", {})
+    if demographics:
+        parts = []
+        for key, label in [
+            ("percent_pell_eligible", "Pell-eligible"),
+            ("percent_first_gen", "first-generation"),
+            ("percent_adult_learners", "adult learners (25+)"),
+        ]:
+            if key in demographics:
+                parts.append(f"{demographics[key]:.0%} {label}")
+        if parts:
+            sections.append(f"Student demographics: {', '.join(parts)}.")
+
+    # Programs
+    programs = domain.get("programs", [])
+    if programs:
+        program_names = [p["name"] for p in programs[:5]]
+        sections.append(f"Key programs: {', '.join(program_names)}.")
+
+    # Challenges
+    challenges = school.get("challenges", [])
+    if challenges:
+        sections.append("Known challenges:\n" + "\n".join(f"- {c}" for c in challenges))
+
+    # Strengths
+    strengths = school.get("strengths", [])
+    if strengths:
+        sections.append("Institutional strengths:\n" + "\n".join(f"- {s}" for s in strengths))
+
+    # Equity gaps
+    equity = school.get("equity", {})
+    known_gaps = equity.get("known_gaps", [])
+    if known_gaps:
+        gap_lines = []
+        for gap in known_gaps:
+            ga = gap.get("group_a", {})
+            gb = gap.get("group_b", {})
+            gap_lines.append(
+                f"- {gap['metric']}: {ga.get('name', '?')} ({ga.get('value', '?')}) "
+                f"vs {gb.get('name', '?')} ({gb.get('value', '?')})"
+            )
+        sections.append("Known equity gaps:\n" + "\n".join(gap_lines))
+
+    # Interventions
+    interventions = school.get("interventions", {})
+    active = interventions.get("active", [])
+    if active:
+        lines = []
+        for i in active:
+            line = f"- {i['name']} ({i['type']}): {i.get('effectiveness', 'effectiveness unknown')}"
+            lines.append(line)
+        sections.append("Active interventions:\n" + "\n".join(lines))
+
+    # Priorities
+    priorities = school.get("priorities", {})
+    top_goals = priorities.get("top_goals", [])
+    if top_goals:
+        sections.append("Strategic priorities:\n" + "\n".join(f"- {g}" for g in top_goals))
+
+    # Data caveats
+    caveats = school.get("data_caveats", [])
+    if caveats:
+        sections.append("Data caveats:\n" + "\n".join(f"- {c}" for c in caveats))
+
+    # Completion context
+    completion = school.get("completion", {})
+    if completion:
+        parts = []
+        if "ipeds_graduation_rate" in completion:
+            parts.append(f"IPEDS grad rate: {completion['ipeds_graduation_rate']:.0%}")
+        if "adjusted_completion_rate" in completion:
+            parts.append(f"adjusted completion: {completion['adjusted_completion_rate']:.0%}")
+        barriers = completion.get("top_completion_barriers", [])
+        if barriers:
+            parts.append(f"top barriers: {', '.join(b.replace('_', ' ') for b in barriers)}")
+        if parts:
+            sections.append(f"Completion context: {'; '.join(parts)}.")
+
+    # Student life
+    student_life = school.get("student_life", {})
+    if student_life:
+        parts = []
+        if "percent_working_over_20hrs" in student_life:
+            parts.append(f"{student_life['percent_working_over_20hrs']:.0%} working 20+ hrs/wk")
+        if "food_insecurity_rate" in student_life:
+            parts.append(f"{student_life['food_insecurity_rate']:.0%} food insecure")
+        if "percent_single_parents" in student_life:
+            parts.append(f"{student_life['percent_single_parents']:.0%} single parents")
+        if parts:
+            sections.append(f"Student life: {', '.join(parts)}.")
+
+    # Patterns
+    patterns = school.get("patterns", {})
+    attrition_points = patterns.get("high_attrition_points", [])
+    if attrition_points:
+        lines = []
+        for point in attrition_points:
+            when = f"week {point['week']}" if "week" in point else point.get("month", "?")
+            lines.append(f"- {when}: {point['reason']}")
+        sections.append("Known attrition patterns:\n" + "\n".join(lines))
+
+    # Workforce
+    workforce = school.get("workforce", {})
+    if workforce:
+        employers = workforce.get("top_employers", [])
+        fields = workforce.get("high_demand_fields", [])
+        if employers or fields:
+            parts = []
+            if employers:
+                parts.append(f"top employers: {', '.join(employers)}")
+            if fields:
+                parts.append(f"high-demand fields: {', '.join(fields)}")
+            sections.append(f"Workforce context: {'; '.join(parts)}.")
+
+    # Outcomes
+    outcomes = school.get("outcomes", {})
+    if outcomes:
+        parts = []
+        if "job_placement_rate_6mo" in outcomes:
+            parts.append(f"6-month job placement: {outcomes['job_placement_rate_6mo']:.0%}")
+        licensure = outcomes.get("licensure_pass_rates", {})
+        if licensure:
+            lic_parts = [f"{k}: {v:.0%}" for k, v in licensure.items()]
+            parts.append(f"licensure pass rates: {', '.join(lic_parts)}")
+        if parts:
+            sections.append(f"Outcomes: {'; '.join(parts)}.")
+
+    sections.append("Respond with ONLY valid JSON.")
+
+    return "\n\n".join(sections)
+
+
+# ---------------------------------------------------------------------------
+# Explainer prompt
+# ---------------------------------------------------------------------------
+
+
+def build_explainer_prompt(
+    config: dict[str, Any],
+    course_data: dict[str, Any],
+) -> str:
+    """Build the teacher prompt for generating a course pairing explanation.
+
+    Args:
+        config: Parsed school config dict.
+        course_data: Course pairing data dict with keys: course_a, course_b, stats.
+
+    Returns:
+        User prompt string for the teacher model.
+    """
+    schema_str = json.dumps(EXPLAINER_SCHEMA, indent=2)
+    data_str = json.dumps(course_data, indent=2, default=str)
+
+    terminology = config.get("domain", {}).get("terminology", {})
+    term_lines = "\n".join(f"- {k}: {v}" for k, v in terminology.items()) if terminology else ""
+
+    return f"""Analyze the following course pairing data and explain the pattern.
+
+COURSE PAIRING DATA:
+{data_str}
+
+{f"TERMINOLOGY:{chr(10)}{term_lines}{chr(10)}" if term_lines else ""}
+Generate a JSON response with this exact schema:
+{schema_str}
+
+Guidelines:
+- Explain the pattern in plain language accessible to advisors and faculty.
+- Connect structural factors to the institution's known challenges and context.
+- Make the advisor recommendation specific and actionable.
+- Reference existing interventions if relevant.
+- Note any data limitations that affect interpretation.
+- Do NOT speculate beyond what the data shows."""
+
+
+# ---------------------------------------------------------------------------
+# Summarizer prompt
+# ---------------------------------------------------------------------------
+
+
+def build_summarizer_prompt(
+    config: dict[str, Any],
+    query_data: dict[str, Any],
+) -> str:
+    """Build the teacher prompt for generating a query result summary.
+
+    Args:
+        config: Parsed school config dict.
+        query_data: Dict with keys: prompt, data, rowCount, vizType.
+
+    Returns:
+        User prompt string for the teacher model.
+    """
+    schema_str = json.dumps(SUMMARIZER_SCHEMA, indent=2)
+    data_str = json.dumps(query_data["data"][:50], indent=2, default=str)
+    user_query = query_data["prompt"]
+    row_count = query_data.get("rowCount", len(query_data["data"]))
+    viz_type = query_data.get("vizType", "table")
+
+    return f"""Summarize the following query results for a non-technical audience
+(advisors, administrators, faculty).
+
+USER QUERY: {user_query}
+VISUALIZATION TYPE: {viz_type}
+TOTAL ROWS: {row_count}
+
+RESULTS:
+{data_str}
+
+Generate a JSON response with this exact schema:
+{schema_str}
+
+Guidelines:
+- Lead with the most important finding.
+- Connect insights to institutional context and priorities.
+- Make action items specific to the roles that would see this data.
+- Note data limitations relevant to this specific query.
+- Do NOT hallucinate data points not present in the results."""
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon && venv/bin/python -m pytest tests/training/test_prompts.py -v`
+Expected: All tests PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add training/prompts.py tests/training/test_prompts.py
+git commit -m "feat(training): teacher prompt templates for explainer and summarizer"
+```
+
+---
+
+## Task 5: Seed Data Generation
+
+**Files:**
+- Create: `training/seed.py`
+- Create: `tests/training/test_seed.py`
+
+- [ ] **Step 1: Write the failing tests**
+
+Create `tests/training/test_seed.py`:
+```python
+"""Tests for training.seed — seed data generation."""
+
+import pytest
+import yaml
+from pathlib import Path
+from unittest.mock import patch
+
+from training.seed import (
+    load_seed_queries,
+    generate_synthetic_course_pairings,
+    generate_synthetic_query_results,
+    format_as_chatml,
+)
+
+
+class TestLoadSeedQueries:
+    def test_loads_valid_yaml(self, tmp_path):
+        seed_file = tmp_path / "seed_queries.yaml"
+        seed_file.write_text(yaml.dump({
+            "explainer": [
+                {"query": "MAT 100 and BIO 201", "style": "advisor"},
+            ],
+            "summarizer": [
+                {"query": "retention by race", "style": "faculty"},
+            ],
+        }))
+
+        with patch("training.seed.get_school_dir", return_value=tmp_path):
+            result = load_seed_queries("test-school")
+
+        assert len(result["explainer"]) == 1
+        assert len(result["summarizer"]) == 1
+        assert result["explainer"][0]["query"] == "MAT 100 and BIO 201"
+
+    def test_returns_empty_on_missing_file(self, tmp_path):
+        with patch("training.seed.get_school_dir", return_value=tmp_path):
+            result = load_seed_queries("test-school")
+        assert result == {"explainer": [], "summarizer": []}
+
+
+class TestGenerateSyntheticCoursePairings:
+    def test_generates_requested_count(self, sample_school_config):
+        results = generate_synthetic_course_pairings(sample_school_config, count=5)
+        assert len(results) == 5
+
+    def test_each_has_required_keys(self, sample_school_config):
+        results = generate_synthetic_course_pairings(sample_school_config, count=3)
+        for r in results:
+            assert "course_a" in r
+            assert "course_b" in r
+            assert "stats" in r
+            assert "prefix" in r["course_a"]
+            assert "number" in r["course_a"]
+
+    def test_returns_empty_for_zero(self, sample_school_config):
+        results = generate_synthetic_course_pairings(sample_school_config, count=0)
+        assert results == []
+
+
+class TestGenerateSyntheticQueryResults:
+    def test_generates_requested_count(self, sample_school_config):
+        results = generate_synthetic_query_results(sample_school_config, count=5)
+        assert len(results) == 5
+
+    def test_each_has_required_keys(self, sample_school_config):
+        results = generate_synthetic_query_results(sample_school_config, count=3)
+        for r in results:
+            assert "prompt" in r
+            assert "data" in r
+            assert "rowCount" in r
+            assert "vizType" in r
+
+    def test_returns_empty_for_zero(self, sample_school_config):
+        results = generate_synthetic_query_results(sample_school_config, count=0)
+        assert results == []
+
+
+class TestFormatAsChatML:
+    def test_format_structure(self):
+        result = format_as_chatml("system", "user", "assistant")
+        assert "messages" in result
+        assert len(result["messages"]) == 3
+        assert result["messages"][0] == {"role": "system", "content": "system"}
+        assert result["messages"][1] == {"role": "user", "content": "user"}
+        assert result["messages"][2] == {"role": "assistant", "content": "assistant"}
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon && venv/bin/python -m pytest tests/training/test_seed.py -v`
+Expected: FAIL — `ModuleNotFoundError: No module named 'training.seed'`
+
+- [ ] **Step 3: Write the implementation**
+
+Create `training/seed.py`:
+```python
+"""Seed data generation for the distillation pipeline.
+
+Generates synthetic course pairing data and query results to serve as
+inputs for the teacher model during distillation. Also loads template
+seed queries from the school's seed_queries.yaml.
+"""
+
+from __future__ import annotations
+
+import random
+from typing import Any
+
+import yaml
+
+from training.config import get_school_dir
+
+# ---------------------------------------------------------------------------
+# Common course data for synthetic generation
+# ---------------------------------------------------------------------------
+
+_PREFIXES = ["MAT", "ENG", "BIO", "CIS", "WDT", "HIS", "PSY", "BUS", "NUR", "EMS"]
+_NUMBERS = ["100", "101", "110", "201", "202", "210", "241", "246"]
+_NAMES = {
+    "MAT 100": "Intermediate Algebra",
+    "MAT 110": "Finite Mathematics",
+    "MAT 201": "Calculus I",
+    "ENG 101": "English Composition I",
+    "ENG 102": "English Composition II",
+    "BIO 201": "Anatomy & Physiology I",
+    "BIO 202": "Anatomy & Physiology II",
+    "CIS 146": "Microcomputer Applications",
+    "CIS 201": "Introduction to Programming",
+    "WDT 108": "SMAW Fillet/OFC",
+    "WDT 109": "SMAW Fillet/PAC/CAC",
+    "HIS 201": "United States History I",
+    "PSY 200": "General Psychology",
+    "BUS 241": "Principles of Accounting I",
+    "NUR 102": "Fundamentals of Nursing",
+    "EMS 100": "EMT Basic",
+}
+_DELIVERY_METHODS = ["Face-to-Face", "Online", "Hybrid"]
+_GRADES = ["A", "B", "C", "D", "F", "W", "I"]
+_VIZ_TYPES = ["bar", "line", "pie", "kpi", "table"]
+
+_QUERY_TEMPLATES = [
+    ("retention rate by {dim} for {year} cohort", "bar"),
+    ("overall {metric} trend from 2019 to 2023", "line"),
+    ("{metric} for first-generation students", "kpi"),
+    ("{metric} by enrollment intensity", "bar"),
+    ("top 10 courses with highest DFW rates", "table"),
+    ("{metric} by {dim}", "bar"),
+    ("students with {alert} early warning alert", "kpi"),
+    ("{metric} distribution by program", "bar"),
+    ("{metric} gap between full-time and part-time students", "bar"),
+    ("at-risk student count by {dim}", "pie"),
+]
+
+_DIMS = ["race", "gender", "cohort", "program", "enrollment intensity", "math placement"]
+_METRICS = ["retention rate", "completion rate", "GPA", "DFW rate", "pass rate"]
+_ALERTS = ["URGENT", "HIGH", "MODERATE"]
+_YEARS = ["2019", "2020", "2021", "2022", "2023"]
+_RACES = ["Black", "White", "Hispanic", "Asian", "Two or More", "Unknown"]
+
+
+# ---------------------------------------------------------------------------
+# Seed query loader
+# ---------------------------------------------------------------------------
+
+
+def load_seed_queries(school: str) -> dict[str, list[dict]]:
+    """Load seed queries from a school's seed_queries.yaml.
+
+    Args:
+        school: School directory name.
+
+    Returns:
+        Dict with "explainer" and "summarizer" lists of query dicts.
+    """
+    seed_path = get_school_dir(school) / "seed_queries.yaml"
+    if not seed_path.exists():
+        return {"explainer": [], "summarizer": []}
+
+    with seed_path.open("r", encoding="utf-8") as fh:
+        data = yaml.safe_load(fh) or {}
+
+    return {
+        "explainer": data.get("explainer", []),
+        "summarizer": data.get("summarizer", []),
+    }
+
+
+# ---------------------------------------------------------------------------
+# Synthetic course pairing generation
+# ---------------------------------------------------------------------------
+
+
+def _random_course() -> dict[str, str]:
+    """Generate a random course identifier."""
+    prefix = random.choice(_PREFIXES)
+    number = random.choice(_NUMBERS)
+    key = f"{prefix} {number}"
+    name = _NAMES.get(key, f"{prefix} {number} Course")
+    return {"prefix": prefix, "number": number, "name": name}
+
+
+def _random_stats() -> dict[str, Any]:
+    """Generate random course pairing statistics."""
+    dfwi_a = round(random.uniform(0.15, 0.55), 2)
+    dfwi_b = round(random.uniform(0.15, 0.55), 2)
+    co_count = random.randint(20, 200)
+    co_dfwi = round(random.uniform(min(dfwi_a, dfwi_b), max(dfwi_a, dfwi_b) + 0.1), 2)
+    co_dfwi = min(co_dfwi, 0.75)
+
+    delivery_breakdown = []
+    remaining = co_count
+    for method in _DELIVERY_METHODS:
+        if method == _DELIVERY_METHODS[-1]:
+            count = remaining
+        else:
+            count = random.randint(5, remaining - 5 * (len(_DELIVERY_METHODS) - len(delivery_breakdown) - 1))
+            count = max(count, 1)
+        remaining -= count
+        delivery_breakdown.append({
+            "method": method,
+            "count": count,
+            "dfwi_rate": round(random.uniform(0.15, 0.55), 2),
+        })
+
+    return {
+        "course_a_dfwi": dfwi_a,
+        "course_b_dfwi": dfwi_b,
+        "co_enrollment_count": co_count,
+        "co_enrollment_dfwi": co_dfwi,
+        "delivery_breakdown": delivery_breakdown,
+    }
+
+
+def generate_synthetic_course_pairings(
+    config: dict[str, Any],
+    count: int,
+) -> list[dict[str, Any]]:
+    """Generate synthetic course pairing data for explainer training.
+
+    Args:
+        config: Parsed school config dict (used for program-aware generation).
+        count: Number of pairings to generate.
+
+    Returns:
+        List of course pairing data dicts.
+    """
+    if count == 0:
+        return []
+
+    results = []
+    for _ in range(count):
+        course_a = _random_course()
+        course_b = _random_course()
+        while course_b["prefix"] == course_a["prefix"] and course_b["number"] == course_a["number"]:
+            course_b = _random_course()
+        results.append({
+            "course_a": course_a,
+            "course_b": course_b,
+            "stats": _random_stats(),
+        })
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Synthetic query result generation
+# ---------------------------------------------------------------------------
+
+
+def generate_synthetic_query_results(
+    config: dict[str, Any],
+    count: int,
+) -> list[dict[str, Any]]:
+    """Generate synthetic query results for summarizer training.
+
+    Args:
+        config: Parsed school config dict.
+        count: Number of query results to generate.
+
+    Returns:
+        List of query result dicts with prompt, data, rowCount, vizType.
+    """
+    if count == 0:
+        return []
+
+    results = []
+    for i in range(count):
+        template, default_viz = _QUERY_TEMPLATES[i % len(_QUERY_TEMPLATES)]
+        prompt = template.format(
+            dim=random.choice(_DIMS),
+            metric=random.choice(_METRICS),
+            year=random.choice(_YEARS),
+            alert=random.choice(_ALERTS),
+        )
+
+        # Generate plausible result rows
+        num_rows = random.randint(2, 8)
+        data = []
+        for _ in range(num_rows):
+            row = {
+                "Race": random.choice(_RACES),
+                "value": round(random.uniform(0.15, 0.85), 2),
+                "count": random.randint(10, 500),
+            }
+            data.append(row)
+
+        results.append({
+            "prompt": prompt,
+            "data": data,
+            "rowCount": num_rows,
+            "vizType": default_viz,
+        })
+
+    return results
+
+
+# ---------------------------------------------------------------------------
+# ChatML formatter
+# ---------------------------------------------------------------------------
+
+
+def format_as_chatml(system: str, user: str, assistant: str) -> dict:
+    """Format a (system, user, assistant) triple as a ChatML messages dict.
+
+    Args:
+        system: The system prompt text.
+        user: The user message text.
+        assistant: The assistant response text.
+
+    Returns:
+        A dict with a "messages" key containing a list of 3 role/content dicts.
+    """
+    return {
+        "messages": [
+            {"role": "system", "content": system},
+            {"role": "user", "content": user},
+            {"role": "assistant", "content": assistant},
+        ]
+    }
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon && venv/bin/python -m pytest tests/training/test_seed.py -v`
+Expected: All tests PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add training/seed.py tests/training/test_seed.py
+git commit -m "feat(training): seed data generation for explainer and summarizer"
+```
+
+---
+
+## Task 6: Distillation Pipeline
+
+**Files:**
+- Create: `training/distill.py`
+- Create: `tests/training/test_distill.py`
+
+- [ ] **Step 1: Write the failing tests**
+
+Create `tests/training/test_distill.py`:
+```python
+"""Tests for training.distill — teacher model distillation."""
+
+import json
+import pytest
+from unittest.mock import patch, MagicMock
+
+from training.distill import (
+    validate_json,
+    call_teacher,
+    generate_explainer_pairs,
+    generate_summarizer_pairs,
+)
+
+
+class TestValidateJson:
+    def test_valid_json(self):
+        result = validate_json('{"key": "value"}')
+        assert result == {"key": "value"}
+
+    def test_strips_markdown_fences(self):
+        result = validate_json('```json\n{"key": "value"}\n```')
+        assert result == {"key": "value"}
+
+    def test_returns_none_for_invalid(self):
+        assert validate_json("not json") is None
+
+    def test_returns_none_for_empty(self):
+        assert validate_json("") is None
+        assert validate_json(None) is None
+
+    def test_returns_none_for_non_dict(self):
+        assert validate_json("[1, 2, 3]") is None
+
+
+class TestCallTeacher:
+    def test_calls_anthropic_backend(self):
+        mock_client = MagicMock()
+        mock_message = MagicMock()
+        mock_message.content = [MagicMock(text='{"result": "ok"}')]
+        mock_message.usage.input_tokens = 100
+        mock_message.usage.output_tokens = 50
+        mock_client.messages.create.return_value = mock_message
+
+        with patch("training.distill._get_anthropic_client", return_value=mock_client):
+            result = call_teacher(
+                system="system prompt",
+                user="user prompt",
+                backend="anthropic",
+                model="claude-sonnet-4-20250514",
+            )
+
+        assert result == '{"result": "ok"}'
+        mock_client.messages.create.assert_called_once()
+
+    def test_calls_ollama_backend(self):
+        mock_response = {"message": {"content": '{"result": "ok"}'}}
+
+        with patch("training.distill.ollama") as mock_ollama:
+            mock_ollama.chat.return_value = mock_response
+            result = call_teacher(
+                system="system prompt",
+                user="user prompt",
+                backend="ollama",
+                model="qwen3.5:27b",
+            )
+
+        assert result == '{"result": "ok"}'
+        mock_ollama.chat.assert_called_once()
+
+
+class TestGenerateExplainerPairs:
+    def test_generates_pairs_from_seed_data(self, sample_school_config, sample_course_pairing_data):
+        mock_response = json.dumps({
+            "explanation": "Test explanation",
+            "structural_factors": ["factor1"],
+            "student_impact": "impact",
+            "advisor_recommendation": "recommendation",
+            "data_limitations": ["caveat"],
+            "related_intervention": None,
+        })
+
+        with patch("training.distill.call_teacher", return_value=mock_response):
+            pairs = generate_explainer_pairs(
+                config=sample_school_config,
+                seed_data=[sample_course_pairing_data],
+                count=2,
+            )
+
+        assert len(pairs) == 2
+        assert "messages" in pairs[0]
+        assert len(pairs[0]["messages"]) == 3
+
+    def test_skips_invalid_responses(self, sample_school_config, sample_course_pairing_data):
+        with patch("training.distill.call_teacher", return_value="not json"):
+            pairs = generate_explainer_pairs(
+                config=sample_school_config,
+                seed_data=[sample_course_pairing_data],
+                count=3,
+            )
+
+        assert len(pairs) == 0
+
+
+class TestGenerateSummarizerPairs:
+    def test_generates_pairs_from_seed_data(self, sample_school_config, sample_query_result_data):
+        mock_response = json.dumps({
+            "summary": "Test summary",
+            "key_insights": ["insight1"],
+            "context": "context",
+            "action_items": ["action"],
+            "caveats": ["caveat"],
+        })
+
+        with patch("training.distill.call_teacher", return_value=mock_response):
+            pairs = generate_summarizer_pairs(
+                config=sample_school_config,
+                seed_data=[sample_query_result_data],
+                count=2,
+            )
+
+        assert len(pairs) == 2
+        assert "messages" in pairs[0]
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon && venv/bin/python -m pytest tests/training/test_distill.py -v`
+Expected: FAIL — `ModuleNotFoundError: No module named 'training.distill'`
+
+- [ ] **Step 3: Write the implementation**
+
+Create `training/distill.py`:
+```python
+"""Distillation pipeline — generate ChatML training pairs via a teacher model.
+
+Supports two backends:
+  - anthropic: Claude Sonnet via Anthropic API (production quality)
+  - ollama: Local model via Ollama (free iteration)
+
+Usage:
+    python -m training.distill --school bishop-state [--local]
+"""
+
+from __future__ import annotations
+
+import argparse
+import functools
+import json
+import os
+import time
+from pathlib import Path
+from typing import Any
+
+from training.config import get_training_data_dir, load_school_config, write_jsonl
+from training.prompts import (
+    EXPLAINER_STUDENT_SYSTEM,
+    SUMMARIZER_STUDENT_SYSTEM,
+    build_explainer_prompt,
+    build_summarizer_prompt,
+    build_system_prompt,
+)
+from training.seed import (
+    format_as_chatml,
+    generate_synthetic_course_pairings,
+    generate_synthetic_query_results,
+    load_seed_queries,
+)
+
+# ---------------------------------------------------------------------------
+# Cost tracking
+# ---------------------------------------------------------------------------
+
+_COST_PER_M_INPUT = 3.00
+_COST_PER_M_OUTPUT = 15.00
+_total_input_tokens = 0
+_total_output_tokens = 0
+_total_calls = 0
+
+
+def _track_cost(input_tokens: int, output_tokens: int) -> None:
+    global _total_input_tokens, _total_output_tokens, _total_calls
+    _total_input_tokens += input_tokens
+    _total_output_tokens += output_tokens
+    _total_calls += 1
+
+
+def _cost_so_far() -> float:
+    return (
+        _total_input_tokens / 1_000_000 * _COST_PER_M_INPUT
+        + _total_output_tokens / 1_000_000 * _COST_PER_M_OUTPUT
+    )
+
+
+def _print_cost_summary() -> None:
+    cost = _cost_so_far()
+    print(
+        f"[cost] {_total_calls} API calls | "
+        f"{_total_input_tokens:,} in + {_total_output_tokens:,} out tokens | "
+        f"${cost:.2f} spent so far",
+        flush=True,
+    )
+
+
+# ---------------------------------------------------------------------------
+# JSON validation
+# ---------------------------------------------------------------------------
+
+
+def validate_json(text: str | None) -> dict | None:
+    """Strip markdown fences and parse as JSON dict.
+
+    Returns None if text is empty, not valid JSON, or not a dict.
+    """
+    if not text or not isinstance(text, str) or not text.strip():
+        return None
+
+    stripped = text.strip()
+
+    if stripped.startswith("```"):
+        lines = stripped.splitlines()
+        lines = lines[1:]
+        if lines and lines[-1].strip() == "```":
+            lines = lines[:-1]
+        stripped = "\n".join(lines).strip()
+
+    try:
+        obj = json.loads(stripped)
+    except (json.JSONDecodeError, ValueError):
+        return None
+
+    if not isinstance(obj, dict):
+        return None
+
+    return obj
+
+
+# ---------------------------------------------------------------------------
+# Teacher model caller
+# ---------------------------------------------------------------------------
+
+
+@functools.lru_cache(maxsize=1)
+def _get_anthropic_client():
+    """Return a cached Anthropic client instance."""
+    import anthropic
+
+    api_key = os.environ.get("ANTHROPIC_API_KEY")
+    if not api_key:
+        raise EnvironmentError(
+            "ANTHROPIC_API_KEY environment variable is required for Claude distillation."
+        )
+    return anthropic.Anthropic(api_key=api_key)
+
+
+try:
+    import ollama
+except ImportError:
+    ollama = None  # type: ignore[assignment]
+
+
+def call_teacher(
+    system: str,
+    user: str,
+    backend: str,
+    model: str,
+) -> str:
+    """Call the teacher model and return the response text.
+
+    Args:
+        system: System prompt.
+        user: User message.
+        backend: "anthropic" or "ollama".
+        model: Model identifier.
+
+    Returns:
+        The assistant response as a string.
+    """
+    preview = user[:120].replace("\n", " ")
+    print(f"[api] Calling {model} ({backend}) | {preview}...", flush=True)
+
+    if backend == "anthropic":
+        client = _get_anthropic_client()
+        message = client.messages.create(
+            model=model,
+            max_tokens=2048,
+            system=system,
+            messages=[{"role": "user", "content": user}],
+        )
+        usage = message.usage
+        _track_cost(usage.input_tokens, usage.output_tokens)
+        print(f"[api] done {usage.input_tokens}in/{usage.output_tokens}out tokens", flush=True)
+        if _total_calls % 10 == 0:
+            _print_cost_summary()
+        return message.content[0].text
+
+    elif backend == "ollama":
+        if ollama is None:
+            raise ImportError("ollama package is required for local teacher. Install with: pip install ollama")
+        response = ollama.chat(
+            model=model,
+            messages=[
+                {"role": "system", "content": system},
+                {"role": "user", "content": user},
+            ],
+        )
+        return response["message"]["content"]
+
+    else:
+        raise ValueError(f"Unknown backend: {backend!r}. Must be 'anthropic' or 'ollama'.")
+
+
+# ---------------------------------------------------------------------------
+# Pair generators
+# ---------------------------------------------------------------------------
+
+
+def generate_explainer_pairs(
+    config: dict[str, Any],
+    seed_data: list[dict[str, Any]],
+    count: int,
+    outfile: Path | None = None,
+) -> list[dict]:
+    """Generate explainer training pairs via teacher model distillation.
+
+    Args:
+        config: Parsed school config dict.
+        seed_data: List of course pairing data dicts.
+        count: Number of pairs to generate.
+        outfile: If provided, pairs are written incrementally.
+
+    Returns:
+        List of ChatML pair dicts.
+    """
+    distill_config = config.get("distillation", {})
+    backend = distill_config.get("teacher_backend", "anthropic")
+    model = distill_config.get("teacher_model", "claude-sonnet-4-20250514")
+
+    system_prompt = build_system_prompt(config)
+    pairs: list[dict] = []
+
+    fh = None
+    if outfile is not None:
+        outfile.parent.mkdir(parents=True, exist_ok=True)
+        fh = outfile.open("w", encoding="utf-8")
+
+    try:
+        for idx in range(count):
+            if idx > 0 and idx % 25 == 0:
+                time.sleep(1)
+
+            course_data = seed_data[idx % len(seed_data)]
+            teacher_prompt = build_explainer_prompt(config, course_data)
+
+            try:
+                response_text = call_teacher(system_prompt, teacher_prompt, backend, model)
+            except Exception as exc:
+                print(f"[warn] Teacher call failed for explainer pair {idx}: {exc}", flush=True)
+                continue
+
+            validated = validate_json(response_text)
+            if validated is None:
+                print(f"[warn] Invalid JSON for explainer pair {idx}, skipping.", flush=True)
+                continue
+
+            student_user = json.dumps(course_data, ensure_ascii=False, default=str)
+            pair = format_as_chatml(
+                system=EXPLAINER_STUDENT_SYSTEM,
+                user=student_user,
+                assistant=json.dumps(validated, ensure_ascii=False),
+            )
+            pairs.append(pair)
+            if fh is not None:
+                fh.write(json.dumps(pair, ensure_ascii=False) + "\n")
+                fh.flush()
+            print(f"[explainer] {len(pairs)}/{count} pairs generated", flush=True)
+    finally:
+        if fh is not None:
+            fh.close()
+            print(f"[explainer] Saved {len(pairs)} pairs to {outfile}", flush=True)
+
+    return pairs
+
+
+def generate_summarizer_pairs(
+    config: dict[str, Any],
+    seed_data: list[dict[str, Any]],
+    count: int,
+    outfile: Path | None = None,
+) -> list[dict]:
+    """Generate summarizer training pairs via teacher model distillation.
+
+    Args:
+        config: Parsed school config dict.
+        seed_data: List of query result data dicts.
+        count: Number of pairs to generate.
+        outfile: If provided, pairs are written incrementally.
+
+    Returns:
+        List of ChatML pair dicts.
+    """
+    distill_config = config.get("distillation", {})
+    backend = distill_config.get("teacher_backend", "anthropic")
+    model = distill_config.get("teacher_model", "claude-sonnet-4-20250514")
+
+    system_prompt = build_system_prompt(config)
+    pairs: list[dict] = []
+
+    fh = None
+    if outfile is not None:
+        outfile.parent.mkdir(parents=True, exist_ok=True)
+        fh = outfile.open("w", encoding="utf-8")
+
+    try:
+        for idx in range(count):
+            if idx > 0 and idx % 25 == 0:
+                time.sleep(1)
+
+            query_data = seed_data[idx % len(seed_data)]
+            teacher_prompt = build_summarizer_prompt(config, query_data)
+
+            try:
+                response_text = call_teacher(system_prompt, teacher_prompt, backend, model)
+            except Exception as exc:
+                print(f"[warn] Teacher call failed for summarizer pair {idx}: {exc}", flush=True)
+                continue
+
+            validated = validate_json(response_text)
+            if validated is None:
+                print(f"[warn] Invalid JSON for summarizer pair {idx}, skipping.", flush=True)
+                continue
+
+            student_user = json.dumps(
+                {"prompt": query_data["prompt"], "data": query_data["data"][:50]},
+                ensure_ascii=False,
+                default=str,
+            )
+            pair = format_as_chatml(
+                system=SUMMARIZER_STUDENT_SYSTEM,
+                user=student_user,
+                assistant=json.dumps(validated, ensure_ascii=False),
+            )
+            pairs.append(pair)
+            if fh is not None:
+                fh.write(json.dumps(pair, ensure_ascii=False) + "\n")
+                fh.flush()
+            print(f"[summarizer] {len(pairs)}/{count} pairs generated", flush=True)
+    finally:
+        if fh is not None:
+            fh.close()
+            print(f"[summarizer] Saved {len(pairs)} pairs to {outfile}", flush=True)
+
+    return pairs
+
+
+# ---------------------------------------------------------------------------
+# CLI entry point
+# ---------------------------------------------------------------------------
+
+
+def main(school: str, local: bool = False) -> None:
+    """Run distillation for a school.
+
+    Args:
+        school: School directory name.
+        local: If True, use local Ollama teacher instead of Claude.
+    """
+    config = load_school_config(school)
+
+    if local:
+        config["distillation"]["teacher_backend"] = config["distillation"].get(
+            "local_teacher_backend", "ollama"
+        )
+        config["distillation"]["teacher_model"] = config["distillation"].get(
+            "local_teacher_model", "qwen3.5:27b"
+        )
+        print(f"[distill] Using local teacher: {config['distillation']['teacher_model']}")
+    else:
+        print(f"[distill] Using API teacher: {config['distillation']['teacher_model']}")
+
+    pairs_per_task = config["distillation"].get("pairs_per_task", 1500)
+    data_dir = get_training_data_dir(school)
+    pairs_dir = data_dir / "pairs"
+
+    # Load seed queries
+    seed_queries = load_seed_queries(school)
+
+    # Generate synthetic seed data
+    synthetic_pairings = generate_synthetic_course_pairings(config, count=pairs_per_task)
+    synthetic_results = generate_synthetic_query_results(config, count=pairs_per_task)
+
+    # Explainer
+    print(f"\n{'='*60}")
+    print(f"EXPLAINER — generating {pairs_per_task} pairs")
+    print(f"{'='*60}")
+    explainer_pairs = generate_explainer_pairs(
+        config=config,
+        seed_data=synthetic_pairings,
+        count=pairs_per_task,
+        outfile=pairs_dir / "explainer.jsonl",
+    )
+
+    # Summarizer
+    print(f"\n{'='*60}")
+    print(f"SUMMARIZER — generating {pairs_per_task} pairs")
+    print(f"{'='*60}")
+    summarizer_pairs = generate_summarizer_pairs(
+        config=config,
+        seed_data=synthetic_results,
+        count=pairs_per_task,
+        outfile=pairs_dir / "summarizer.jsonl",
+    )
+
+    print(f"\n{'='*60}")
+    print("DISTILLATION COMPLETE")
+    print(f"{'='*60}")
+    print(f"  Explainer: {len(explainer_pairs)} pairs")
+    print(f"  Summarizer: {len(summarizer_pairs)} pairs")
+    _print_cost_summary()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Generate training pairs via teacher model distillation."
+    )
+    parser.add_argument("--school", required=True, help="School directory name")
+    parser.add_argument("--local", action="store_true", help="Use local Ollama teacher")
+    args = parser.parse_args()
+    main(args.school, local=args.local)
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon && venv/bin/python -m pytest tests/training/test_distill.py -v`
+Expected: All tests PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add training/distill.py tests/training/test_distill.py
+git commit -m "feat(training): distillation pipeline with dual teacher backend support"
+```
+
+---
+
+## Task 7: Dataset Preparation
+
+**Files:**
+- Create: `training/prepare.py`
+- Create: `tests/training/test_prepare.py`
+
+- [ ] **Step 1: Write the failing tests**
+
+Create `tests/training/test_prepare.py`:
+```python
+"""Tests for training.prepare — filter, deduplicate, and split."""
+
+import json
+import pytest
+
+from training.prepare import (
+    filter_invalid_json,
+    deduplicate_by_jaccard,
+    jaccard_similarity,
+    split_dataset,
+)
+
+
+class TestFilterInvalidJson:
+    def test_keeps_valid_pairs(self):
+        pairs = [
+            {"messages": [
+                {"role": "system", "content": "sys"},
+                {"role": "user", "content": "question"},
+                {"role": "assistant", "content": '{"key": "value"}'},
+            ]}
+        ]
+        result = filter_invalid_json(pairs)
+        assert len(result) == 1
+
+    def test_removes_invalid_json_assistant(self):
+        pairs = [
+            {"messages": [
+                {"role": "system", "content": "sys"},
+                {"role": "user", "content": "question"},
+                {"role": "assistant", "content": "not json"},
+            ]}
+        ]
+        result = filter_invalid_json(pairs)
+        assert len(result) == 0
+
+    def test_removes_missing_messages(self):
+        assert filter_invalid_json([{"no_messages": True}]) == []
+
+    def test_removes_empty_user(self):
+        pairs = [
+            {"messages": [
+                {"role": "system", "content": "sys"},
+                {"role": "user", "content": ""},
+                {"role": "assistant", "content": '{"key": "value"}'},
+            ]}
+        ]
+        result = filter_invalid_json(pairs)
+        assert len(result) == 0
+
+
+class TestJaccardSimilarity:
+    def test_identical_strings(self):
+        assert jaccard_similarity("hello world", "hello world") == 1.0
+
+    def test_completely_different(self):
+        assert jaccard_similarity("hello", "world") == 0.0
+
+    def test_partial_overlap(self):
+        result = jaccard_similarity("hello world foo", "hello world bar")
+        assert 0.0 < result < 1.0
+
+    def test_empty_string(self):
+        assert jaccard_similarity("", "hello") == 0.0
+
+
+class TestDeduplicateByJaccard:
+    def test_removes_exact_duplicates(self):
+        pairs = [
+            {"messages": [{"role": "user", "content": "same question"}]},
+            {"messages": [{"role": "user", "content": "same question"}]},
+            {"messages": [{"role": "user", "content": "different question"}]},
+        ]
+        result = deduplicate_by_jaccard(pairs, threshold=1.0)
+        assert len(result) == 2
+
+    def test_empty_input(self):
+        assert deduplicate_by_jaccard([], threshold=1.0) == []
+
+    def test_preserves_order(self):
+        pairs = [
+            {"messages": [{"role": "user", "content": "first"}]},
+            {"messages": [{"role": "user", "content": "second"}]},
+        ]
+        result = deduplicate_by_jaccard(pairs, threshold=1.0)
+        assert result[0]["messages"][0]["content"] == "first"
+
+
+class TestSplitDataset:
+    def test_split_ratios(self):
+        pairs = [{"id": i} for i in range(100)]
+        splits = split_dataset(pairs, train_ratio=0.8, val_ratio=0.1)
+        assert len(splits["train"]) == 80
+        assert len(splits["val"]) == 10
+        assert len(splits["test"]) == 10
+
+    def test_deterministic(self):
+        pairs = [{"id": i} for i in range(50)]
+        split1 = split_dataset(pairs, seed=42)
+        split2 = split_dataset(pairs, seed=42)
+        assert split1["train"] == split2["train"]
+
+    def test_empty_input(self):
+        splits = split_dataset([])
+        assert splits == {"train": [], "val": [], "test": []}
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon && venv/bin/python -m pytest tests/training/test_prepare.py -v`
+Expected: FAIL — `ModuleNotFoundError: No module named 'training.prepare'`
+
+- [ ] **Step 3: Write the implementation**
+
+Create `training/prepare.py`:
+```python
+"""Dataset preparation — filter, deduplicate, and split training pairs.
+
+Adapted from d4bl pipeline. Loads raw JSONL from distillation, applies
+quality filtering, removes near-duplicates, and writes 80/10/10 splits.
+
+Usage:
+    python -m training.prepare --school bishop-state
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import random
+from pathlib import Path
+from typing import Any
+
+from training.config import (
+    JACCARD_THRESHOLD,
+    TRAIN_RATIO,
+    VAL_RATIO,
+    get_training_data_dir,
+    write_jsonl,
+)
+
+
+# ---------------------------------------------------------------------------
+# Pure helpers
+# ---------------------------------------------------------------------------
+
+
+def jaccard_similarity(a: str, b: str) -> float:
+    """Compute word-level Jaccard similarity between two strings."""
+    words_a = set(a.lower().split())
+    words_b = set(b.lower().split())
+    if not words_a or not words_b:
+        return 0.0
+    return len(words_a & words_b) / len(words_a | words_b)
+
+
+def _get_user_text(pair: dict[str, Any]) -> str:
+    """Extract user message content from a ChatML pair."""
+    for msg in pair.get("messages", []):
+        if msg.get("role") == "user":
+            return msg.get("content", "")
+    return ""
+
+
+# ---------------------------------------------------------------------------
+# Filtering
+# ---------------------------------------------------------------------------
+
+
+def filter_invalid_json(pairs: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Keep only pairs with valid structure and JSON-parseable assistant content."""
+    valid = []
+    for pair in pairs:
+        messages = pair.get("messages")
+        if not isinstance(messages, list) or not messages:
+            continue
+        if any(not isinstance(msg, dict) for msg in messages):
+            continue
+        has_user = any(
+            msg.get("role") == "user" and msg.get("content")
+            for msg in messages
+        )
+        if not has_user:
+            continue
+        assistant_content = None
+        for msg in messages:
+            if msg.get("role") == "assistant":
+                assistant_content = msg.get("content")
+                break
+        if not isinstance(assistant_content, str) or not assistant_content:
+            continue
+        try:
+            json.loads(assistant_content)
+        except (json.JSONDecodeError, ValueError):
+            continue
+        valid.append(pair)
+    return valid
+
+
+# ---------------------------------------------------------------------------
+# Deduplication
+# ---------------------------------------------------------------------------
+
+
+def deduplicate_by_jaccard(
+    pairs: list[dict[str, Any]],
+    threshold: float = JACCARD_THRESHOLD,
+) -> list[dict[str, Any]]:
+    """Remove near-duplicate pairs based on user-message Jaccard similarity."""
+    if not pairs:
+        return pairs
+
+    kept: list[dict[str, Any]] = [pairs[0]]
+    kept_word_sets: list[set] = [set(_get_user_text(pairs[0]).lower().split())]
+
+    for pair in pairs[1:]:
+        candidate_words = set(_get_user_text(pair).lower().split())
+        is_duplicate = any(
+            _jaccard_sets(candidate_words, kw) >= threshold
+            for kw in kept_word_sets
+        )
+        if not is_duplicate:
+            kept.append(pair)
+            kept_word_sets.append(candidate_words)
+
+    return kept
+
+
+def _jaccard_sets(a: set, b: set) -> float:
+    if not a or not b:
+        return 0.0
+    return len(a & b) / len(a | b)
+
+
+# ---------------------------------------------------------------------------
+# Splitting
+# ---------------------------------------------------------------------------
+
+
+def split_dataset(
+    pairs: list[dict[str, Any]],
+    train_ratio: float = TRAIN_RATIO,
+    val_ratio: float = VAL_RATIO,
+    seed: int = 42,
+) -> dict[str, list[dict[str, Any]]]:
+    """Shuffle and split pairs into train/val/test with a deterministic seed."""
+    if not pairs:
+        return {"train": [], "val": [], "test": []}
+
+    shuffled = list(pairs)
+    rng = random.Random(seed)
+    rng.shuffle(shuffled)
+
+    n = len(shuffled)
+    train_end = round(n * train_ratio)
+    val_end = train_end + round(n * val_ratio)
+
+    return {
+        "train": shuffled[:train_end],
+        "val": shuffled[train_end:val_end],
+        "test": shuffled[val_end:],
+    }
+
+
+# ---------------------------------------------------------------------------
+# I/O
+# ---------------------------------------------------------------------------
+
+
+def _load_pairs(path: Path) -> list[dict[str, Any]]:
+    """Load newline-delimited JSON from path."""
+    pairs = []
+    with path.open() as fh:
+        for line in fh:
+            line = line.strip()
+            if line:
+                pairs.append(json.loads(line))
+    return pairs
+
+
+# ---------------------------------------------------------------------------
+# Orchestrator
+# ---------------------------------------------------------------------------
+
+
+def process_task(school: str, task: str) -> dict[str, int]:
+    """Load, filter, deduplicate, and split training data for a task.
+
+    Args:
+        school: School directory name.
+        task: Task name ("explainer" or "summarizer").
+
+    Returns:
+        Dict mapping split name to number of examples written.
+    """
+    data_dir = get_training_data_dir(school)
+    input_path = data_dir / "pairs" / f"{task}.jsonl"
+    if not input_path.exists():
+        raise FileNotFoundError(f"Pairs file not found: {input_path}")
+
+    pairs = _load_pairs(input_path)
+    print(f"[{task}] Loaded {len(pairs)} pairs from {input_path}")
+
+    pairs = filter_invalid_json(pairs)
+    print(f"[{task}] After JSON filter: {len(pairs)} pairs")
+
+    pairs = deduplicate_by_jaccard(pairs, threshold=JACCARD_THRESHOLD)
+    print(f"[{task}] After deduplication: {len(pairs)} pairs")
+
+    splits = split_dataset(pairs)
+
+    final_dir = data_dir / "final" / task
+    counts: dict[str, int] = {}
+    for split_name, split_pairs in splits.items():
+        out_path = final_dir / f"{split_name}.jsonl"
+        n = write_jsonl(split_pairs, out_path)
+        counts[split_name] = n
+        print(f"[{task}] Wrote {n} examples to {out_path}")
+
+    return counts
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def main(school: str) -> None:
+    """Run preparation for all tasks."""
+    for task in ("explainer", "summarizer"):
+        try:
+            process_task(school, task)
+        except FileNotFoundError as e:
+            print(f"[warn] {e} — skipping")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Filter, deduplicate, and split training pairs.")
+    parser.add_argument("--school", required=True, help="School directory name")
+    args = parser.parse_args()
+    main(args.school)
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon && venv/bin/python -m pytest tests/training/test_prepare.py -v`
+Expected: All tests PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add training/prepare.py tests/training/test_prepare.py
+git commit -m "feat(training): dataset preparation — filter, dedup, and split"
+```
+
+---
+
+## Task 8: Eval Harness and Ship Criteria
+
+**Files:**
+- Create: `training/eval.py`
+- Create: `tests/training/test_eval.py`
+
+- [ ] **Step 1: Write the failing tests**
+
+Create `tests/training/test_eval.py`:
+```python
+"""Tests for training.eval — metrics and ship criteria."""
+
+import json
+import pytest
+
+from training.eval import (
+    SHIP_CRITERIA,
+    check_json_validity,
+    check_schema_adherence,
+    check_caveat_inclusion,
+    check_ship_criteria,
+    ShipDecision,
+)
+
+
+class TestCheckJsonValidity:
+    def test_all_valid(self):
+        outputs = ['{"key": "value"}', '{"a": 1}']
+        assert check_json_validity(outputs) == 1.0
+
+    def test_some_invalid(self):
+        outputs = ['{"key": "value"}', "not json", '{"a": 1}']
+        assert check_json_validity(outputs) == pytest.approx(2 / 3)
+
+    def test_empty(self):
+        assert check_json_validity([]) == 0.0
+
+
+class TestCheckSchemaAdherence:
+    def test_explainer_all_valid(self, sample_explainer_output):
+        outputs = [json.dumps(sample_explainer_output)]
+        assert check_schema_adherence(outputs, "explainer") == 1.0
+
+    def test_explainer_missing_key(self):
+        incomplete = json.dumps({"explanation": "test"})
+        assert check_schema_adherence([incomplete], "explainer") < 1.0
+
+    def test_summarizer_all_valid(self, sample_summarizer_output):
+        outputs = [json.dumps(sample_summarizer_output)]
+        assert check_schema_adherence(outputs, "summarizer") == 1.0
+
+
+class TestCheckCaveatInclusion:
+    def test_all_have_caveats(self, sample_explainer_output):
+        outputs = [json.dumps(sample_explainer_output)]
+        assert check_caveat_inclusion(outputs, "explainer") == 1.0
+
+    def test_missing_caveats(self):
+        no_caveats = json.dumps({
+            "explanation": "test",
+            "structural_factors": [],
+            "student_impact": "impact",
+            "advisor_recommendation": "rec",
+            "data_limitations": [],
+            "related_intervention": None,
+        })
+        assert check_caveat_inclusion([no_caveats], "explainer") == 0.0
+
+
+class TestShipCriteria:
+    def test_passes_with_good_metrics(self):
+        metrics = {
+            "json_validity": 0.98,
+            "schema_adherence": 0.95,
+            "caveat_inclusion": 0.92,
+            "factual_grounding": 0.90,
+        }
+        decision = check_ship_criteria(metrics, "explainer")
+        assert decision.decision == "ship"
+        assert len(decision.blocking_failures) == 0
+
+    def test_fails_with_low_json_validity(self):
+        metrics = {
+            "json_validity": 0.80,
+            "schema_adherence": 0.95,
+            "caveat_inclusion": 0.92,
+            "factual_grounding": 0.90,
+        }
+        decision = check_ship_criteria(metrics, "explainer")
+        assert decision.decision == "no_ship"
+        assert len(decision.blocking_failures) > 0
+
+    def test_ship_with_gaps(self):
+        metrics = {
+            "json_validity": 0.98,
+            "schema_adherence": 0.95,
+            "caveat_inclusion": 0.85,
+            "factual_grounding": 0.90,
+            "explanation_quality": 0.30,  # Below non-blocking threshold
+        }
+        decision = check_ship_criteria(metrics, "explainer")
+        assert decision.decision in ("ship", "ship_with_gaps")
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon && venv/bin/python -m pytest tests/training/test_eval.py -v`
+Expected: FAIL — `ModuleNotFoundError: No module named 'training.eval'`
+
+- [ ] **Step 3: Write the implementation**
+
+Create `training/eval.py`:
+```python
+"""Evaluation harness and ship criteria for fine-tuned models.
+
+Runs a fine-tuned model against held-out test data and checks
+whether it meets the minimum quality thresholds for deployment.
+
+Usage:
+    python -m training.eval --school bishop-state
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+from training.config import get_training_data_dir, load_school_config
+
+# ---------------------------------------------------------------------------
+# Ship criteria thresholds
+# ---------------------------------------------------------------------------
+
+SHIP_CRITERIA: dict[str, dict[str, dict]] = {
+    "explainer": {
+        "json_validity": {"min": 0.95, "blocking": True},
+        "schema_adherence": {"min": 0.90, "blocking": True},
+        "caveat_inclusion": {"min": 0.90, "blocking": True},
+        "factual_grounding": {"min": 0.85, "blocking": True},
+        "explanation_quality": {"min": 0.35, "blocking": False},
+        "actionability": {"min": 0.80, "blocking": False},
+    },
+    "summarizer": {
+        "json_validity": {"min": 0.95, "blocking": True},
+        "schema_adherence": {"min": 0.90, "blocking": True},
+        "caveat_inclusion": {"min": 0.90, "blocking": True},
+        "factual_grounding": {"min": 0.85, "blocking": True},
+        "explanation_quality": {"min": 0.35, "blocking": False},
+        "actionability": {"min": 0.80, "blocking": False},
+    },
+}
+
+_EXPLAINER_REQUIRED_KEYS = {
+    "explanation", "structural_factors", "student_impact",
+    "advisor_recommendation", "data_limitations", "related_intervention",
+}
+_SUMMARIZER_REQUIRED_KEYS = {
+    "summary", "key_insights", "context", "action_items", "caveats",
+}
+_CAVEAT_KEY = {
+    "explainer": "data_limitations",
+    "summarizer": "caveats",
+}
+
+
+# ---------------------------------------------------------------------------
+# Data classes
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class CriterionFailure:
+    metric: str
+    threshold: float
+    actual: float | None
+    blocking: bool
+
+
+@dataclass
+class ShipDecision:
+    decision: str  # "ship", "no_ship", "ship_with_gaps"
+    blocking_failures: list[CriterionFailure] = field(default_factory=list)
+    nonblocking_failures: list[CriterionFailure] = field(default_factory=list)
+    metrics_checked: int = 0
+
+
+# ---------------------------------------------------------------------------
+# Metric computation
+# ---------------------------------------------------------------------------
+
+
+def check_json_validity(outputs: list[str]) -> float:
+    """Compute the fraction of outputs that parse as valid JSON dicts."""
+    if not outputs:
+        return 0.0
+    valid = 0
+    for out in outputs:
+        try:
+            obj = json.loads(out)
+            if isinstance(obj, dict):
+                valid += 1
+        except (json.JSONDecodeError, ValueError, TypeError):
+            pass
+    return valid / len(outputs)
+
+
+def check_schema_adherence(outputs: list[str], task: str) -> float:
+    """Compute the fraction of outputs with all required keys present."""
+    if not outputs:
+        return 0.0
+
+    required = _EXPLAINER_REQUIRED_KEYS if task == "explainer" else _SUMMARIZER_REQUIRED_KEYS
+    adherent = 0
+    for out in outputs:
+        try:
+            obj = json.loads(out)
+            if isinstance(obj, dict) and required.issubset(obj.keys()):
+                adherent += 1
+        except (json.JSONDecodeError, ValueError, TypeError):
+            pass
+    return adherent / len(outputs)
+
+
+def check_caveat_inclusion(outputs: list[str], task: str) -> float:
+    """Compute the fraction of outputs with non-empty caveat/limitation fields."""
+    if not outputs:
+        return 0.0
+
+    caveat_key = _CAVEAT_KEY.get(task, "caveats")
+    with_caveats = 0
+    for out in outputs:
+        try:
+            obj = json.loads(out)
+            if isinstance(obj, dict):
+                caveats = obj.get(caveat_key, [])
+                if isinstance(caveats, list) and len(caveats) > 0:
+                    with_caveats += 1
+        except (json.JSONDecodeError, ValueError, TypeError):
+            pass
+    return with_caveats / len(outputs)
+
+
+def check_factual_grounding(outputs: list[str], inputs: list[str]) -> float:
+    """Check that outputs reference values present in their corresponding inputs.
+
+    Simple heuristic: extracts numeric values from the input and checks
+    that at least one appears in the output.
+    """
+    if not outputs or not inputs:
+        return 0.0
+
+    import re
+
+    grounded = 0
+    for out, inp in zip(outputs, inputs):
+        numbers_in_input = set(re.findall(r"\d+\.?\d*", inp))
+        if not numbers_in_input:
+            grounded += 1  # No numbers to check against
+            continue
+        # Check if at least one input number appears in the output
+        if any(n in out for n in numbers_in_input):
+            grounded += 1
+
+    return grounded / len(outputs)
+
+
+# ---------------------------------------------------------------------------
+# Ship criteria checker
+# ---------------------------------------------------------------------------
+
+
+def check_ship_criteria(
+    metrics: dict[str, float],
+    task: str,
+) -> ShipDecision:
+    """Compare metrics against ship thresholds.
+
+    Args:
+        metrics: Dict of metric_name → value.
+        task: "explainer" or "summarizer".
+
+    Returns:
+        ShipDecision with pass/fail details.
+    """
+    criteria = SHIP_CRITERIA.get(task, {})
+    blocking_failures = []
+    nonblocking_failures = []
+    checked = 0
+
+    for metric_name, spec in criteria.items():
+        actual = metrics.get(metric_name)
+        if actual is None:
+            continue
+        checked += 1
+
+        threshold = spec.get("min", spec.get("max"))
+        blocking = spec.get("blocking", True)
+
+        failed = False
+        if "min" in spec and actual < spec["min"]:
+            failed = True
+        if "max" in spec and actual > spec["max"]:
+            failed = True
+
+        if failed:
+            failure = CriterionFailure(
+                metric=metric_name,
+                threshold=threshold,
+                actual=actual,
+                blocking=blocking,
+            )
+            if blocking:
+                blocking_failures.append(failure)
+            else:
+                nonblocking_failures.append(failure)
+
+    if blocking_failures:
+        decision = "no_ship"
+    elif nonblocking_failures:
+        decision = "ship_with_gaps"
+    else:
+        decision = "ship"
+
+    return ShipDecision(
+        decision=decision,
+        blocking_failures=blocking_failures,
+        nonblocking_failures=nonblocking_failures,
+        metrics_checked=checked,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Test set loader
+# ---------------------------------------------------------------------------
+
+
+def load_test_set(path: Path) -> list[dict]:
+    """Load a ChatML JSONL test set and extract input/expected pairs."""
+    results = []
+    with path.open() as fh:
+        for line in fh:
+            if not line.strip():
+                continue
+            example = json.loads(line)
+            messages = example["messages"]
+            user_msg = messages[1]["content"]
+            assistant_msg = messages[2]["content"]
+            results.append({
+                "input": user_msg,
+                "expected_raw": assistant_msg,
+            })
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Eval runner
+# ---------------------------------------------------------------------------
+
+
+def run_eval(school: str, task: str) -> ShipDecision:
+    """Run evaluation for a school's fine-tuned model on one task.
+
+    Loads the test set, runs inference via Ollama, computes metrics,
+    and checks ship criteria.
+
+    Args:
+        school: School directory name.
+        task: "explainer" or "summarizer".
+
+    Returns:
+        ShipDecision.
+    """
+    data_dir = get_training_data_dir(school)
+    test_path = data_dir / "final" / task / "test.jsonl"
+
+    if not test_path.exists():
+        raise FileNotFoundError(f"Test set not found: {test_path}")
+
+    test_set = load_test_set(test_path)
+    print(f"[{task}] Loaded {len(test_set)} test examples from {test_path}")
+
+    config = load_school_config(school)
+    model_name = f"{school}-{task}:{config['training']['default_model'].split(':')[1]}"
+
+    # Run inference
+    try:
+        import ollama as ollama_client
+    except ImportError:
+        raise ImportError("ollama package required for evaluation. Install with: pip install ollama")
+
+    outputs = []
+    inputs = []
+    for i, example in enumerate(test_set):
+        try:
+            response = ollama_client.chat(
+                model=model_name,
+                messages=[
+                    {"role": "user", "content": example["input"]},
+                ],
+            )
+            outputs.append(response["message"]["content"])
+            inputs.append(example["input"])
+        except Exception as exc:
+            print(f"[warn] Inference failed for example {i}: {exc}")
+            outputs.append("")
+            inputs.append(example["input"])
+
+        if (i + 1) % 10 == 0:
+            print(f"[{task}] Evaluated {i + 1}/{len(test_set)} examples", flush=True)
+
+    # Compute metrics
+    metrics = {
+        "json_validity": check_json_validity(outputs),
+        "schema_adherence": check_schema_adherence(outputs, task),
+        "caveat_inclusion": check_caveat_inclusion(outputs, task),
+        "factual_grounding": check_factual_grounding(outputs, inputs),
+    }
+
+    # Print results
+    print(f"\n[{task}] Metrics:")
+    for name, value in metrics.items():
+        threshold_info = SHIP_CRITERIA.get(task, {}).get(name, {})
+        threshold = threshold_info.get("min", threshold_info.get("max", "?"))
+        status = "PASS" if value >= threshold if isinstance(threshold, (int, float)) else True else "FAIL"
+        print(f"  {name}: {value:.1%} (threshold: {threshold}) {status}")
+
+    decision = check_ship_criteria(metrics, task)
+    print(f"\n[{task}] DECISION: {decision.decision.upper()}")
+    if decision.blocking_failures:
+        for f in decision.blocking_failures:
+            print(f"  BLOCKING: {f.metric} = {f.actual:.1%} (need {f.threshold})")
+    if decision.nonblocking_failures:
+        for f in decision.nonblocking_failures:
+            print(f"  WARNING: {f.metric} = {f.actual:.1%} (need {f.threshold})")
+
+    return decision
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def main(school: str) -> None:
+    """Run evaluation for all tasks."""
+    results = {}
+    for task in ("explainer", "summarizer"):
+        try:
+            results[task] = run_eval(school, task)
+        except FileNotFoundError as e:
+            print(f"[warn] {e} — skipping")
+
+    print(f"\n{'='*60}")
+    print("EVALUATION SUMMARY")
+    print(f"{'='*60}")
+    all_ship = True
+    for task, decision in results.items():
+        status = decision.decision.upper()
+        print(f"  {task}: {status}")
+        if decision.decision == "no_ship":
+            all_ship = False
+
+    if all_ship:
+        print("\nAll adapters PASS — ready to export.")
+    else:
+        print("\nSome adapters FAILED — fix issues before exporting.")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Evaluate fine-tuned models against ship criteria.")
+    parser.add_argument("--school", required=True, help="School directory name")
+    args = parser.parse_args()
+    main(args.school)
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon && venv/bin/python -m pytest tests/training/test_eval.py -v`
+Expected: All tests PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add training/eval.py tests/training/test_eval.py
+git commit -m "feat(training): eval harness with ship criteria for model quality gates"
+```
+
+---
+
+## Task 9: MLX Fine-Tuning Wrapper
+
+**Files:**
+- Create: `training/finetune.py`
+
+This task wraps MLX's `mlx_lm` fine-tuning CLI. No unit tests for the actual training (it requires GPU time), but we test the config generation.
+
+- [ ] **Step 1: Write the implementation**
+
+Create `training/finetune.py`:
+```python
+"""Fine-tuning wrapper for MLX QLoRA on Apple Silicon.
+
+Wraps mlx_lm's LoRA fine-tuning with school-specific config.
+
+Usage:
+    python -m training.finetune --school bishop-state --model 9b
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+from training.config import get_training_data_dir, load_school_config
+
+# ---------------------------------------------------------------------------
+# Model name mapping
+# ---------------------------------------------------------------------------
+
+_MODEL_MAP = {
+    "4b": "Qwen/Qwen3.5-4B",
+    "9b": "Qwen/Qwen3.5-9B",
+    "27b": "Qwen/Qwen3.5-27B",
+}
+
+
+def _resolve_model(model_shorthand: str) -> str:
+    """Resolve a shorthand like '9b' to a HuggingFace model path."""
+    if model_shorthand in _MODEL_MAP:
+        return _MODEL_MAP[model_shorthand]
+    return model_shorthand
+
+
+# ---------------------------------------------------------------------------
+# Config generation
+# ---------------------------------------------------------------------------
+
+
+def build_lora_config(config: dict, task: str, data_dir: Path) -> dict:
+    """Build the MLX LoRA fine-tuning config dict.
+
+    Args:
+        config: Parsed school config.
+        task: "explainer" or "summarizer".
+        data_dir: Path to the school's training_data directory.
+
+    Returns:
+        Dict suitable for writing as JSON config for mlx_lm.lora.
+    """
+    training = config.get("training", {})
+    final_dir = data_dir / "final" / task
+
+    return {
+        "train": str(final_dir / "train.jsonl"),
+        "valid": str(final_dir / "val.jsonl"),
+        "test": str(final_dir / "test.jsonl"),
+        "lora_layers": training.get("lora_rank", 16),
+        "lora_parameters": {
+            "rank": training.get("lora_rank", 16),
+            "alpha": training.get("lora_alpha", 32),
+            "dropout": 0.05,
+            "scale": training.get("lora_alpha", 32) / training.get("lora_rank", 16),
+        },
+        "learning_rate": training.get("learning_rate", 1e-4),
+        "batch_size": training.get("batch_size", 4),
+        "iters": training.get("epochs", 3) * 1000,  # Approximate
+        "val_batches": 25,
+        "steps_per_eval": training.get("eval_every", 50),
+        "save_every": 100,
+        "max_seq_length": 2048,
+        "grad_checkpoint": True,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Fine-tuning runner
+# ---------------------------------------------------------------------------
+
+
+def run_finetune(school: str, model: str = "9b", task: str | None = None) -> None:
+    """Run MLX LoRA fine-tuning for a school's adapter(s).
+
+    Args:
+        school: School directory name.
+        model: Model shorthand ("4b", "9b") or full HF path.
+        task: Specific task, or None to train both adapters.
+    """
+    config = load_school_config(school)
+    data_dir = get_training_data_dir(school)
+    hf_model = _resolve_model(model)
+
+    tasks = [task] if task else ["explainer", "summarizer"]
+
+    for t in tasks:
+        print(f"\n{'='*60}")
+        print(f"FINE-TUNING: {t} adapter on {hf_model}")
+        print(f"{'='*60}")
+
+        adapter_dir = data_dir / "models" / f"qwen3.5-{model}" / t
+        adapter_dir.mkdir(parents=True, exist_ok=True)
+
+        lora_config = build_lora_config(config, t, data_dir)
+        config_path = adapter_dir / "lora_config.json"
+        config_path.write_text(json.dumps(lora_config, indent=2))
+
+        cmd = [
+            sys.executable, "-m", "mlx_lm.lora",
+            "--model", hf_model,
+            "--adapter-path", str(adapter_dir),
+            "--data", str(data_dir / "final" / t),
+            "--train",
+            "--batch-size", str(lora_config["batch_size"]),
+            "--lora-layers", str(lora_config["lora_layers"]),
+            "--iters", str(lora_config["iters"]),
+            "--val-batches", str(lora_config["val_batches"]),
+            "--steps-per-eval", str(lora_config["steps_per_eval"]),
+            "--save-every", str(lora_config["save_every"]),
+            "--learning-rate", str(lora_config["learning_rate"]),
+            "--max-seq-length", str(lora_config["max_seq_length"]),
+            "--grad-checkpoint",
+        ]
+
+        print(f"[finetune] Running: {' '.join(cmd[:6])}...")
+        result = subprocess.run(cmd, cwd=str(data_dir))
+
+        if result.returncode != 0:
+            print(f"[finetune] FAILED for {t} — exit code {result.returncode}")
+        else:
+            print(f"[finetune] SUCCESS — adapter saved to {adapter_dir}")
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Fine-tune a model for a school via MLX QLoRA.")
+    parser.add_argument("--school", required=True, help="School directory name")
+    parser.add_argument("--model", default="9b", help="Model size: 4b, 9b, or HF path")
+    parser.add_argument("--task", choices=["explainer", "summarizer"], help="Train one adapter only")
+    args = parser.parse_args()
+    run_finetune(args.school, model=args.model, task=args.task)
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+git add training/finetune.py
+git commit -m "feat(training): MLX QLoRA fine-tuning wrapper"
+```
+
+---
+
+## Task 10: Ollama Export
+
+**Files:**
+- Create: `training/export.py`
+
+- [ ] **Step 1: Write the implementation**
+
+Create `training/export.py`:
+```python
+"""Export fine-tuned adapters to Ollama for serving.
+
+Creates an Ollama Modelfile and registers the model.
+
+Usage:
+    python -m training.export --school bishop-state
+"""
+
+from __future__ import annotations
+
+import argparse
+import subprocess
+import sys
+from pathlib import Path
+
+from training.config import get_training_data_dir, load_school_config
+
+# ---------------------------------------------------------------------------
+# Modelfile generation
+# ---------------------------------------------------------------------------
+
+_MODELFILE_TEMPLATE = """FROM {base_model}
+ADAPTER {adapter_path}
+
+PARAMETER temperature 0.3
+PARAMETER top_p 0.9
+PARAMETER num_ctx 4096
+
+SYSTEM {system_prompt}
+"""
+
+
+def generate_modelfile(
+    base_model: str,
+    adapter_path: Path,
+    system_prompt: str,
+) -> str:
+    """Generate an Ollama Modelfile string.
+
+    Args:
+        base_model: Base model name (e.g. "qwen3.5:9b").
+        adapter_path: Path to the LoRA adapter directory.
+        system_prompt: System prompt to bake into the model.
+
+    Returns:
+        Modelfile content string.
+    """
+    return _MODELFILE_TEMPLATE.format(
+        base_model=base_model,
+        adapter_path=str(adapter_path),
+        system_prompt=json.dumps(system_prompt),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Registration
+# ---------------------------------------------------------------------------
+
+import json
+
+from training.prompts import EXPLAINER_STUDENT_SYSTEM, SUMMARIZER_STUDENT_SYSTEM
+
+_SYSTEM_PROMPTS = {
+    "explainer": EXPLAINER_STUDENT_SYSTEM,
+    "summarizer": SUMMARIZER_STUDENT_SYSTEM,
+}
+
+
+def export_model(school: str, task: str, model: str = "9b") -> bool:
+    """Export a fine-tuned adapter to Ollama.
+
+    Args:
+        school: School directory name.
+        task: "explainer" or "summarizer".
+        model: Model size shorthand.
+
+    Returns:
+        True if registration succeeded.
+    """
+    data_dir = get_training_data_dir(school)
+    adapter_dir = data_dir / "models" / f"qwen3.5-{model}" / task
+
+    if not adapter_dir.exists():
+        print(f"[export] Adapter not found: {adapter_dir}")
+        return False
+
+    base_model = f"qwen3.5:{model}"
+    ollama_name = f"{school}-{task}:{model}"
+    system_prompt = _SYSTEM_PROMPTS.get(task, "")
+
+    modelfile_content = generate_modelfile(base_model, adapter_dir, system_prompt)
+    modelfile_path = adapter_dir / "Modelfile"
+    modelfile_path.write_text(modelfile_content)
+    print(f"[export] Wrote Modelfile to {modelfile_path}")
+
+    # Register with Ollama
+    cmd = ["ollama", "create", ollama_name, "-f", str(modelfile_path)]
+    print(f"[export] Registering: {' '.join(cmd)}")
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
+        if result.returncode == 0:
+            print(f"[export] Registered: {ollama_name}")
+            return True
+        else:
+            print(f"[export] FAILED: {result.stderr}")
+            return False
+    except FileNotFoundError:
+        print("[export] Ollama CLI not found. Install from https://ollama.com")
+        return False
+    except subprocess.TimeoutExpired:
+        print("[export] Ollama create timed out after 5 minutes")
+        return False
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def main(school: str) -> None:
+    """Export all adapters for a school."""
+    config = load_school_config(school)
+    model = config["training"]["default_model"].split(":")[1]
+
+    results = {}
+    for task in ("explainer", "summarizer"):
+        results[task] = export_model(school, task, model=model)
+
+    print(f"\n{'='*60}")
+    print("EXPORT SUMMARY")
+    print(f"{'='*60}")
+    for task, success in results.items():
+        status = "OK" if success else "FAILED"
+        print(f"  {school}-{task}:{model} — {status}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Export fine-tuned models to Ollama.")
+    parser.add_argument("--school", required=True, help="School directory name")
+    args = parser.parse_args()
+    main(args.school)
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+git add training/export.py
+git commit -m "feat(training): Ollama model export and registration"
+```
+
+---
+
+## Task 11: Dashboard Model Client
+
+**Files:**
+- Create: `codebenders-dashboard/lib/model-client.ts`
+
+- [ ] **Step 1: Write the implementation**
+
+Create `codebenders-dashboard/lib/model-client.ts`:
+```typescript
+/**
+ * Model client adapter — routes inference to Ollama (fine-tuned) or
+ * OpenAI (fallback) based on MODEL_BACKEND env var.
+ */
+
+import { generateText } from "ai"
+import { createOpenAI } from "@ai-sdk/openai"
+
+const MODEL_BACKEND = process.env.MODEL_BACKEND || "openai"
+const SCHOOL_CODE = process.env.SCHOOL_CODE || "bishop-state"
+const OLLAMA_BASE_URL = process.env.OLLAMA_BASE_URL || "http://localhost:11434"
+
+const openai = createOpenAI({
+  apiKey: process.env.OPENAI_API_KEY || "",
+})
+
+interface ModelResponse {
+  text: string
+}
+
+async function callOllama(model: string, prompt: string): Promise<string> {
+  const response = await fetch(`${OLLAMA_BASE_URL}/api/generate`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model,
+      prompt,
+      stream: false,
+      options: {
+        temperature: 0.3,
+        num_predict: 1024,
+      },
+    }),
+  })
+
+  if (!response.ok) {
+    throw new Error(`Ollama error: ${response.status} ${response.statusText}`)
+  }
+
+  const data = await response.json()
+  return data.response
+}
+
+async function callOpenAI(prompt: string, maxTokens: number): Promise<string> {
+  const result = await generateText({
+    model: openai("gpt-4o-mini"),
+    prompt,
+    maxTokens,
+  })
+  return result.text
+}
+
+/**
+ * Generate a course pairing explanation.
+ *
+ * Routes to the school's fine-tuned explainer model via Ollama,
+ * or falls back to OpenAI GPT-4o-mini.
+ */
+export async function generateExplanation(
+  prompt: string,
+  maxTokens: number = 320,
+): Promise<string> {
+  if (MODEL_BACKEND === "ollama") {
+    const modelSize = process.env.MODEL_SIZE || "9b"
+    const model = `${SCHOOL_CODE}-explainer:${modelSize}`
+    return callOllama(model, prompt)
+  }
+  return callOpenAI(prompt, maxTokens)
+}
+
+/**
+ * Generate a query result summary.
+ *
+ * Routes to the school's fine-tuned summarizer model via Ollama,
+ * or falls back to OpenAI GPT-4o-mini.
+ */
+export async function generateSummary(
+  prompt: string,
+  maxTokens: number = 200,
+): Promise<string> {
+  if (MODEL_BACKEND === "ollama") {
+    const modelSize = process.env.MODEL_SIZE || "9b"
+    const model = `${SCHOOL_CODE}-summarizer:${modelSize}`
+    return callOllama(model, prompt)
+  }
+  return callOpenAI(prompt, maxTokens)
+}
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+git add codebenders-dashboard/lib/model-client.ts
+git commit -m "feat(dashboard): model client adapter for Ollama/OpenAI routing"
+```
+
+---
+
+## Task 12: Integrate Model Client into API Routes
+
+**Files:**
+- Modify: `codebenders-dashboard/app/api/courses/explain-pairing/route.ts`
+- Modify: `codebenders-dashboard/app/api/query-summary/route.ts`
+
+- [ ] **Step 1: Update explain-pairing route**
+
+In `codebenders-dashboard/app/api/courses/explain-pairing/route.ts`, replace the inline OpenAI call with the model client.
+
+Find the import section and add:
+```typescript
+import { generateExplanation } from "@/lib/model-client"
+```
+
+Find the `generateText` call block (approximately lines 192-196) and replace:
+```typescript
+// Before:
+const { text } = await generateText({
+  model: openai("gpt-4o-mini"),
+  prompt: llmPrompt,
+  maxTokens: 320,
+})
+
+// After:
+const text = await generateExplanation(llmPrompt, 320)
+```
+
+Remove the now-unused inline OpenAI client imports if they become unreferenced after this change.
+
+- [ ] **Step 2: Update query-summary route**
+
+In `codebenders-dashboard/app/api/query-summary/route.ts`, replace the inline OpenAI call with the model client.
+
+Add import:
+```typescript
+import { generateSummary } from "@/lib/model-client"
+```
+
+Find the `generateText` call (approximately lines 50-54) and replace:
+```typescript
+// Before:
+const { text } = await generateText({
+  model: openai("gpt-4o-mini"),
+  prompt: llmPrompt,
+  maxTokens: 200,
+})
+
+// After:
+const text = await generateSummary(llmPrompt, 200)
+```
+
+Remove unused inline OpenAI client imports.
+
+- [ ] **Step 3: Verify dashboard builds**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon/codebenders-dashboard && npm run build`
+Expected: Build succeeds with no TypeScript errors
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add codebenders-dashboard/app/api/courses/explain-pairing/route.ts \
+       codebenders-dashboard/app/api/query-summary/route.ts
+git commit -m "feat(dashboard): route explain-pairing and query-summary through model client"
+```
+
+---
+
+## Task 13: Run All Tests and Final Verification
+
+- [ ] **Step 1: Run full Python test suite**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon && venv/bin/python -m pytest tests/ -v`
+Expected: All tests PASS
+
+- [ ] **Step 2: Verify dashboard builds**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon/codebenders-dashboard && npm run build`
+Expected: Build succeeds
+
+- [ ] **Step 3: Verify pipeline CLI entry points**
+
+Run:
+```bash
+cd /Users/william-meroxa/Development/codebenders-datathon
+venv/bin/python -m training.distill --help
+venv/bin/python -m training.prepare --help
+venv/bin/python -m training.finetune --help
+venv/bin/python -m training.eval --help
+venv/bin/python -m training.export --help
+```
+Expected: Each prints usage without errors
+
+- [ ] **Step 4: Verify config loads end-to-end**
+
+Run: `cd /Users/william-meroxa/Development/codebenders-datathon && venv/bin/python -c "from training.config import load_school_config; c = load_school_config('bishop-state'); print(f'School: {c[\"school\"][\"name\"]}'); print(f'Programs: {len(c[\"domain\"][\"programs\"])}'); print(f'Student columns: {len(c[\"schema\"][\"student_columns\"])}'); print(f'Course columns: {len(c[\"schema\"][\"course_columns\"])}')"`
+Expected: Prints school name, program count, and column counts without errors

From 29af1058a06b9dcd0276ea153e49da46b666bfb9 Mon Sep 17 00:00:00 2001
From: William Hill <mjh2225@gmail.com>
Date: Wed, 1 Apr 2026 19:33:38 -0400
Subject: [PATCH 05/15] docs: add design spec for SIS deep-link feature (#78)

---
 .../specs/2026-04-01-sis-deep-link-design.md  | 119 ++++++++++++++++++
 1 file changed, 119 insertions(+)
 create mode 100644 docs/superpowers/specs/2026-04-01-sis-deep-link-design.md

diff --git a/docs/superpowers/specs/2026-04-01-sis-deep-link-design.md b/docs/superpowers/specs/2026-04-01-sis-deep-link-design.md
new file mode 100644
index 0000000..c26a8e5
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-01-sis-deep-link-design.md
@@ -0,0 +1,119 @@
+# SIS Deep-Link from Student Detail View — Design Spec
+
+**Date:** 2026-04-01
+**Issue:** #78
+**Scope:** Proof of concept / demo
+**Branch:** `feature/sis-deep-link` (from `main`)
+
+## Summary
+
+Add a FERPA-compliant "Open in SIS" button to the student detail page that constructs a deep-link URL to the institution's Student Information System. Identity resolution happens server-side — the browser never receives the SIS student ID. This POC validates the architecture with sample data and a configurable demo URL.
+
+## Architecture
+
+```
+Browser (student detail page)
+  │
+  ├─ GET /api/students/[guid]/sis-link
+  │     │
+  │     ├─ Role check (x-user-role header, admin/advisor/ir only)
+  │     ├─ Query guid_sis_map table for sis_id
+  │     ├─ Build URL: SIS_BASE_URL?SIS_ID_PARAM=<sis_id>
+  │     ├─ Append audit log entry (GUID + role, never sis_id)
+  │     └─ Return { url } or 404
+  │
+  └─ window.open(url, "_blank")
+```
+
+The SIS ID never reaches the client. The audit log records access by GUID and role only.
+
+## 1. Database — `guid_sis_map` Table
+
+Table in the existing Postgres database:
+
+```sql
+CREATE TABLE guid_sis_map (
+  student_guid TEXT PRIMARY KEY,
+  sis_id       TEXT NOT NULL
+);
+```
+
+A seed script picks ~20 random GUIDs from `student_level_with_predictions` and assigns fake SIS IDs (`BSC-100001` through `BSC-100020`). This demonstrates both the happy path (button works) and the fallback (no mapping → disabled button with tooltip).
+
+## 2. Environment Configuration
+
+Two server-only env vars in `.env.local` (no `NEXT_PUBLIC_` prefix):
+
+```env
+# SIS deep-link (leave blank to hide the button entirely)
+SIS_BASE_URL=https://sis-demo.example.com/students
+# Query param name the SIS expects (default: id)
+SIS_ID_PARAM=id
+```
+
+When `SIS_BASE_URL` is unset, the API returns 404 and the UI hides the button — the feature is effectively disabled.
+
+## 3. API Route — `GET /api/students/[guid]/sis-link`
+
+**File:** `codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts`
+
+**Behavior:**
+
+| Condition | Response |
+|-----------|----------|
+| `SIS_BASE_URL` unset | 404 `{ url: null }` |
+| Role not in `admin, advisor, ir` | 403 `{ error: "Forbidden" }` |
+| No mapping in `guid_sis_map` | 404 `{ url: null }` |
+| Mapping found | 200 `{ url: "https://sis-demo.example.com/students?id=BSC-100001" }` |
+
+**Role gating:** Reads `x-user-role` header injected by existing middleware. No changes to `lib/roles.ts` needed — the `/api/students` prefix is already gated to `admin`, `advisor`, `ir`.
+
+**Audit logging:** Appends to `logs/query-history.jsonl`:
+
+```json
+{ "event": "sis_link_accessed", "guid": "<guid>", "role": "advisor", "timestamp": "2026-04-01T12:00:00.000Z" }
+```
+
+The `sis_id` is never logged.
+
+## 4. UI — "Open in SIS" Button
+
+**File:** `codebenders-dashboard/app/students/[guid]/page.tsx`
+
+**Placement:** In the student header card, alongside the existing alert/readiness badges.
+
+**Visibility logic** (determined by the API response on page load):
+
+| API result | Button state |
+|------------|--------------|
+| 200 with URL | Visible and clickable — opens URL in new tab |
+| 404 (no mapping) | Visible but disabled — tooltip: "No SIS record linked for this student" |
+| 403 or fetch error | Hidden entirely |
+
+Uses existing `Button` from shadcn/ui and `ExternalLink` icon from lucide-react. No new component file needed.
+
+## Files Changed
+
+| File | Change |
+|------|--------|
+| `operations/seed_guid_sis_map.py` | New — seed script for demo data |
+| `codebenders-dashboard/.env.local` | Add `SIS_BASE_URL`, `SIS_ID_PARAM` |
+| `codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts` | New — server-side SIS URL builder |
+| `codebenders-dashboard/app/students/[guid]/page.tsx` | Add "Open in SIS" button with fetch logic |
+
+## Out of Scope
+
+- Row-Level Security on `guid_sis_map` (not needed for POC)
+- Real institution SIS integration (demo uses placeholder URL)
+- `.env.example` file (can be added later)
+- Supabase Edge Function alternative
+
+## Acceptance Criteria (from issue #78)
+
+- [x] `SIS_BASE_URL` env var controls whether the button appears (hidden when blank)
+- [x] Button only visible to Advisor + IR + Admin roles
+- [x] SIS ID is never stored in `student_level_with_predictions` or `llm_recommendations`
+- [x] SIS ID is never returned by any public API endpoint (only pre-built URL returned)
+- [x] Every deep-link access is logged (GUID + role, not SIS ID)
+- [x] Button shows a graceful fallback if no mapping exists for a GUID
+- [x] Works with any SIS that accepts a query-param student ID in a URL

From 8b89b7fa5472124e2c1a5ec322b78546d9ec64d9 Mon Sep 17 00:00:00 2001
From: William Hill <mjh2225@gmail.com>
Date: Wed, 1 Apr 2026 19:36:55 -0400
Subject: [PATCH 06/15] docs: add implementation plan for SIS deep-link feature
 (#78)

---
 .../plans/2026-04-01-sis-deep-link.md         | 455 ++++++++++++++++++
 1 file changed, 455 insertions(+)
 create mode 100644 docs/superpowers/plans/2026-04-01-sis-deep-link.md

diff --git a/docs/superpowers/plans/2026-04-01-sis-deep-link.md b/docs/superpowers/plans/2026-04-01-sis-deep-link.md
new file mode 100644
index 0000000..128856b
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-01-sis-deep-link.md
@@ -0,0 +1,455 @@
+# SIS Deep-Link Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Add a FERPA-compliant "Open in SIS" button to the student detail page that constructs a deep-link URL server-side, keeping the SIS student ID out of the browser.
+
+**Architecture:** A new `guid_sis_map` table maps anonymized GUIDs to SIS IDs. A new API route (`GET /api/students/[guid]/sis-link`) performs the lookup, builds the URL server-side, logs access, and returns only the constructed URL. The student detail page fetches this endpoint and renders the button accordingly.
+
+**Tech Stack:** Next.js 16 (App Router), PostgreSQL (pg driver), shadcn/ui, Tailwind CSS, Python (psycopg2 for seed script)
+
+---
+
+## File Structure
+
+| File | Action | Responsibility |
+|------|--------|----------------|
+| `operations/seed_guid_sis_map.py` | Create | Create table + seed ~20 demo mappings |
+| `codebenders-dashboard/.env.local` | Modify | Add `SIS_BASE_URL`, `SIS_ID_PARAM` |
+| `codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts` | Create | Server-side SIS URL builder + audit log |
+| `codebenders-dashboard/app/students/[guid]/page.tsx` | Modify | Add "Open in SIS" button |
+
+---
+
+### Task 1: Create `guid_sis_map` Table and Seed Demo Data
+
+**Files:**
+- Create: `operations/seed_guid_sis_map.py`
+
+- [ ] **Step 1: Write the seed script**
+
+Create `operations/seed_guid_sis_map.py` using the existing `db_config` and `psycopg2` pattern from `operations/db_utils.py`:
+
+```python
+"""
+Seed guid_sis_map Table
+========================
+Creates the guid_sis_map table and populates it with ~20 demo mappings
+for POC/demo purposes. Maps real Student_GUIDs to fake SIS IDs.
+"""
+
+import psycopg2
+from psycopg2.extras import RealDictCursor
+from .db_config import DB_CONFIG
+
+
+def seed_guid_sis_map():
+    """Create guid_sis_map table and seed with demo data."""
+    connection = psycopg2.connect(
+        host=DB_CONFIG['host'],
+        user=DB_CONFIG['user'],
+        password=DB_CONFIG['password'],
+        dbname=DB_CONFIG['database'],
+        port=DB_CONFIG['port'],
+        cursor_factory=RealDictCursor
+    )
+    cursor = connection.cursor()
+
+    # Create table
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS guid_sis_map (
+            student_guid TEXT PRIMARY KEY,
+            sis_id       TEXT NOT NULL
+        );
+    """)
+    print("✓ guid_sis_map table created/verified")
+
+    # Pick ~20 random GUIDs from student_level_with_predictions
+    cursor.execute("""
+        SELECT "Student_GUID"
+        FROM student_level_with_predictions
+        ORDER BY RANDOM()
+        LIMIT 20
+    """)
+    guids = [row['Student_GUID'] for row in cursor.fetchall()]
+
+    if not guids:
+        print("✗ No students found in student_level_with_predictions")
+        cursor.close()
+        connection.close()
+        return False
+
+    # Clear existing demo data and insert fresh mappings
+    cursor.execute("DELETE FROM guid_sis_map")
+
+    for i, guid in enumerate(guids, start=100001):
+        sis_id = f"BSC-{i}"
+        cursor.execute(
+            "INSERT INTO guid_sis_map (student_guid, sis_id) VALUES (%s, %s)",
+            (guid, sis_id)
+        )
+
+    connection.commit()
+    print(f"✓ Seeded {len(guids)} GUID → SIS ID mappings (BSC-100001 .. BSC-{100000 + len(guids)})")
+
+    # Verify
+    cursor.execute("SELECT COUNT(*) AS count FROM guid_sis_map")
+    count = cursor.fetchone()['count']
+    print(f"✓ Verified: {count} records in guid_sis_map")
+
+    cursor.close()
+    connection.close()
+    return True
+
+
+if __name__ == "__main__":
+    print("=" * 60)
+    print("SEEDING guid_sis_map TABLE")
+    print("=" * 60)
+    seed_guid_sis_map()
+```
+
+- [ ] **Step 2: Run the seed script**
+
+Run:
+```bash
+cd /Users/william-meroxa/Development/codebenders-datathon
+source venv/bin/activate
+python -m operations.seed_guid_sis_map
+```
+
+Expected output:
+```
+============================================================
+SEEDING guid_sis_map TABLE
+============================================================
+✓ Connected to database: postgres
+✓ guid_sis_map table created/verified
+✓ Seeded 20 GUID → SIS ID mappings (BSC-100001 .. BSC-100020)
+✓ Verified: 20 records in guid_sis_map
+```
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add operations/seed_guid_sis_map.py
+git commit -m "feat(#78): add guid_sis_map seed script for SIS deep-link POC"
+```
+
+---
+
+### Task 2: Add Environment Variables
+
+**Files:**
+- Modify: `codebenders-dashboard/.env.local` (append at end)
+
+- [ ] **Step 1: Add SIS env vars to `.env.local`**
+
+Append to the end of `codebenders-dashboard/.env.local`:
+
+```env
+
+# SIS Deep-Link Configuration (leave SIS_BASE_URL blank to disable)
+SIS_BASE_URL=https://sis-demo.example.com/students
+SIS_ID_PARAM=id
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+cd /Users/william-meroxa/Development/codebenders-datathon
+git add codebenders-dashboard/.env.local
+git commit -m "feat(#78): add SIS deep-link env vars"
+```
+
+Note: `.env.local` is already gitignored. If it is, skip the commit for this file — the env vars are documented in the design spec and the API route defaults `SIS_ID_PARAM` to `"id"`.
+
+---
+
+### Task 3: Create SIS Link API Route
+
+**Files:**
+- Create: `codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts`
+
+- [ ] **Step 1: Create the API route**
+
+Create `codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts`:
+
+```typescript
+import { type NextRequest, NextResponse } from "next/server"
+import { mkdir, appendFile } from "fs/promises"
+import path from "path"
+import { getPool } from "@/lib/db"
+import type { Role } from "@/lib/roles"
+
+const ALLOWED_ROLES: Role[] = ["admin", "advisor", "ir"]
+
+const LOGS_DIR = path.join(process.cwd(), "logs")
+const LOG_FILE = path.join(LOGS_DIR, "query-history.jsonl")
+
+export async function GET(
+  request: NextRequest,
+  { params }: { params: Promise<{ guid: string }> }
+) {
+  // Feature disabled if SIS_BASE_URL is not configured
+  const sisBaseUrl = process.env.SIS_BASE_URL
+  if (!sisBaseUrl) {
+    return NextResponse.json({ url: null }, { status: 404 })
+  }
+
+  // Role check
+  const role = request.headers.get("x-user-role") as Role | null
+  if (!role || !ALLOWED_ROLES.includes(role)) {
+    return NextResponse.json({ error: "Forbidden" }, { status: 403 })
+  }
+
+  const { guid } = await params
+  if (!guid) {
+    return NextResponse.json({ error: "Missing student GUID" }, { status: 400 })
+  }
+
+  try {
+    // Look up SIS ID from mapping table
+    const pool = getPool()
+    const result = await pool.query(
+      "SELECT sis_id FROM guid_sis_map WHERE student_guid = $1 LIMIT 1",
+      [guid]
+    )
+
+    if (result.rows.length === 0) {
+      return NextResponse.json({ url: null }, { status: 404 })
+    }
+
+    // Build URL server-side — SIS ID never reaches the client
+    const sisIdParam = process.env.SIS_ID_PARAM || "id"
+    const sisId = result.rows[0].sis_id
+    const url = `${sisBaseUrl}?${encodeURIComponent(sisIdParam)}=${encodeURIComponent(sisId)}`
+
+    // Audit log — GUID and role only, never the SIS ID
+    const logEntry = {
+      event: "sis_link_accessed",
+      guid,
+      role,
+      timestamp: new Date().toISOString(),
+    }
+    await mkdir(LOGS_DIR, { recursive: true })
+    await appendFile(LOG_FILE, JSON.stringify(logEntry) + "\n", "utf8")
+
+    return NextResponse.json({ url })
+  } catch (error) {
+    console.error("SIS link lookup error:", error)
+    return NextResponse.json(
+      { error: "Failed to look up SIS link" },
+      { status: 500 }
+    )
+  }
+}
+```
+
+- [ ] **Step 2: Verify the route loads**
+
+Start the dev server and test the endpoint:
+
+```bash
+cd /Users/william-meroxa/Development/codebenders-datathon/codebenders-dashboard
+npm run dev
+```
+
+Then in another terminal, test with curl (this will return 403 without auth headers, which confirms the route loads and the role check works):
+
+```bash
+curl -s http://localhost:3000/api/students/test-guid/sis-link | jq .
+```
+
+Expected: `{ "error": "Forbidden" }` with status 403.
+
+- [ ] **Step 3: Commit**
+
+```bash
+cd /Users/william-meroxa/Development/codebenders-datathon
+git add codebenders-dashboard/app/api/students/\[guid\]/sis-link/route.ts
+git commit -m "feat(#78): add SIS deep-link API route with audit logging"
+```
+
+---
+
+### Task 4: Add "Open in SIS" Button to Student Detail Page
+
+**Files:**
+- Modify: `codebenders-dashboard/app/students/[guid]/page.tsx`
+
+- [ ] **Step 1: Add SIS link state and fetch logic**
+
+In `codebenders-dashboard/app/students/[guid]/page.tsx`, add imports for `ExternalLink` at the top alongside the existing lucide-react imports:
+
+```typescript
+import { ArrowLeft, ExternalLink, ShieldCheck } from "lucide-react"
+```
+
+Add a `Tooltip` import from shadcn/ui (if available) or we'll use the `title` attribute for the POC.
+
+Add new state variables inside the `StudentDetailPage` component, after the existing `error` state:
+
+```typescript
+const [sisLink, setSisLink] = useState<string | null>(null)
+const [sisStatus, setSisStatus] = useState<"loading" | "available" | "unavailable" | "hidden">("loading")
+```
+
+Add a second `useEffect` after the existing one that fetches student data, to fetch the SIS link:
+
+```typescript
+useEffect(() => {
+  if (!guid) return
+  fetch(`/api/students/${encodeURIComponent(guid)}/sis-link`)
+    .then(r => {
+      if (r.status === 403) {
+        setSisStatus("hidden")
+        return null
+      }
+      if (r.status === 404) {
+        setSisStatus("unavailable")
+        return null
+      }
+      if (!r.ok) {
+        setSisStatus("hidden")
+        return null
+      }
+      return r.json()
+    })
+    .then(data => {
+      if (data?.url) {
+        setSisLink(data.url)
+        setSisStatus("available")
+      }
+    })
+    .catch(() => setSisStatus("hidden"))
+}, [guid])
+```
+
+- [ ] **Step 2: Add the button to the student header**
+
+In the same file, find the badges `<div>` in the student header (the `<div className="flex items-center gap-2">` that contains the alert and readiness badges). Add the SIS button before the badges:
+
+Replace this block (around line 181):
+```tsx
+<div className="flex items-center gap-2">
+  {student.at_risk_alert && (
+```
+
+With:
+```tsx
+<div className="flex items-center gap-2">
+  {sisStatus === "available" && sisLink && (
+    <Button
+      variant="outline"
+      size="sm"
+      className="gap-1.5"
+      onClick={() => window.open(sisLink, "_blank", "noopener,noreferrer")}
+    >
+      <ExternalLink className="h-3.5 w-3.5" />
+      Open in SIS
+    </Button>
+  )}
+  {sisStatus === "unavailable" && (
+    <Button
+      variant="outline"
+      size="sm"
+      className="gap-1.5 opacity-50 cursor-not-allowed"
+      disabled
+      title="No SIS record linked for this student"
+    >
+      <ExternalLink className="h-3.5 w-3.5" />
+      Open in SIS
+    </Button>
+  )}
+  {student.at_risk_alert && (
+```
+
+- [ ] **Step 3: Verify in the browser**
+
+1. Start the dev server: `npm run dev`
+2. Navigate to a student detail page for a GUID that was seeded in `guid_sis_map`
+3. Verify the "Open in SIS" button appears and clicking it opens the demo URL in a new tab
+4. Navigate to a student NOT in `guid_sis_map`
+5. Verify the button appears disabled with the tooltip text
+
+To find a seeded GUID for testing:
+```bash
+cd /Users/william-meroxa/Development/codebenders-datathon
+source venv/bin/activate
+python -c "
+import psycopg2
+from operations.db_config import DB_CONFIG
+conn = psycopg2.connect(**DB_CONFIG)
+cur = conn.cursor()
+cur.execute('SELECT student_guid FROM guid_sis_map LIMIT 3')
+for row in cur.fetchall():
+    print(row[0])
+cur.close()
+conn.close()
+"
+```
+
+- [ ] **Step 4: Verify audit log entry**
+
+After clicking the button, check that an audit entry was written:
+
+```bash
+tail -1 codebenders-dashboard/logs/query-history.jsonl
+```
+
+Expected: a JSON line like:
+```json
+{"event":"sis_link_accessed","guid":"<some-guid>","role":"advisor","timestamp":"2026-04-01T..."}
+```
+
+- [ ] **Step 5: Commit**
+
+```bash
+cd /Users/william-meroxa/Development/codebenders-datathon
+git add codebenders-dashboard/app/students/\[guid\]/page.tsx
+git commit -m "feat(#78): add Open in SIS button to student detail page"
+```
+
+---
+
+### Task 5: Final Verification
+
+- [ ] **Step 1: Run lint**
+
+```bash
+cd /Users/william-meroxa/Development/codebenders-datathon/codebenders-dashboard
+npm run lint
+```
+
+Expected: no new warnings or errors.
+
+- [ ] **Step 2: Run build**
+
+```bash
+cd /Users/william-meroxa/Development/codebenders-datathon/codebenders-dashboard
+npm run build
+```
+
+Expected: build succeeds with no type errors.
+
+- [ ] **Step 3: End-to-end walkthrough**
+
+Verify all acceptance criteria from issue #78:
+
+| Criterion | How to verify |
+|-----------|---------------|
+| `SIS_BASE_URL` controls button | Remove the env var, restart dev server, confirm button is hidden |
+| Button only for admin/advisor/ir | Log in as a leadership/faculty user, confirm button is hidden |
+| SIS ID never in public API | Check Network tab — `/sis-link` returns `{ url }` only, not the raw SIS ID |
+| SIS ID never in student data | Check `/api/students/[guid]` response — no `sis_id` field |
+| Deep-link access logged | Check `logs/query-history.jsonl` for `sis_link_accessed` entries |
+| Graceful fallback | Visit a student without a mapping — disabled button with tooltip |
+| Works with any SIS URL | Change `SIS_BASE_URL` in env, verify the URL changes |
+
+- [ ] **Step 4: Final commit (if any lint/build fixes needed)**
+
+```bash
+git add -p
+git commit -m "fix(#78): address lint/build issues"
+```

From 7e2bd031292d03b66cb2079ec05ae8d71112ae5c Mon Sep 17 00:00:00 2001
From: William Hill <mjh2225@gmail.com>
Date: Wed, 1 Apr 2026 19:46:52 -0400
Subject: [PATCH 07/15] feat(#78): add guid_sis_map seed script for SIS
 deep-link POC

---
 operations/seed_guid_sis_map.py | 76 +++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 operations/seed_guid_sis_map.py

diff --git a/operations/seed_guid_sis_map.py b/operations/seed_guid_sis_map.py
new file mode 100644
index 0000000..a544757
--- /dev/null
+++ b/operations/seed_guid_sis_map.py
@@ -0,0 +1,76 @@
+"""
+Seed guid_sis_map Table
+========================
+Creates the guid_sis_map table and populates it with ~20 demo mappings
+for POC/demo purposes. Maps real Student_GUIDs to fake SIS IDs.
+"""
+
+import psycopg2
+from psycopg2.extras import RealDictCursor
+from .db_config import DB_CONFIG
+
+
+def seed_guid_sis_map():
+    """Create guid_sis_map table and seed with demo data."""
+    connection = psycopg2.connect(
+        host=DB_CONFIG['host'],
+        user=DB_CONFIG['user'],
+        password=DB_CONFIG['password'],
+        dbname=DB_CONFIG['database'],
+        port=DB_CONFIG['port'],
+        cursor_factory=RealDictCursor
+    )
+    cursor = connection.cursor()
+
+    # Create table
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS guid_sis_map (
+            student_guid TEXT PRIMARY KEY,
+            sis_id       TEXT NOT NULL
+        );
+    """)
+    print("✓ guid_sis_map table created/verified")
+
+    # Pick ~20 random GUIDs from student_level_with_predictions
+    cursor.execute("""
+        SELECT "Student_GUID"
+        FROM student_level_with_predictions
+        ORDER BY RANDOM()
+        LIMIT 20
+    """)
+    guids = [row['Student_GUID'] for row in cursor.fetchall()]
+
+    if not guids:
+        print("✗ No students found in student_level_with_predictions")
+        cursor.close()
+        connection.close()
+        return False
+
+    # Clear existing demo data and insert fresh mappings
+    cursor.execute("DELETE FROM guid_sis_map")
+
+    for i, guid in enumerate(guids, start=100001):
+        sis_id = f"BSC-{i}"
+        cursor.execute(
+            "INSERT INTO guid_sis_map (student_guid, sis_id) VALUES (%s, %s)",
+            (guid, sis_id)
+        )
+
+    connection.commit()
+    print(f"✓ Seeded {len(guids)} GUID → SIS ID mappings (BSC-100001 .. BSC-{100000 + len(guids)})")
+
+    # Verify
+    cursor.execute("SELECT COUNT(*) AS count FROM guid_sis_map")
+    count = cursor.fetchone()['count']
+    print(f"✓ Verified: {count} records in guid_sis_map")
+
+    cursor.close()
+    connection.close()
+    return True
+
+
+if __name__ == "__main__":
+    print("=" * 60)
+    print("SEEDING guid_sis_map TABLE")
+    print("=" * 60)
+    seed_guid_sis_map()

From 2e7b9e0df5f97514aaa37f2510b732f77c95775f Mon Sep 17 00:00:00 2001
From: William Hill <mjh2225@gmail.com>
Date: Wed, 1 Apr 2026 19:49:53 -0400
Subject: [PATCH 08/15] fix(#78): use get_connection helper and add error
 handling to seed script

---
 operations/seed_guid_sis_map.py | 95 ++++++++++++++++-----------------
 1 file changed, 46 insertions(+), 49 deletions(-)

diff --git a/operations/seed_guid_sis_map.py b/operations/seed_guid_sis_map.py
index a544757..c15c2c6 100644
--- a/operations/seed_guid_sis_map.py
+++ b/operations/seed_guid_sis_map.py
@@ -5,68 +5,65 @@
 for POC/demo purposes. Maps real Student_GUIDs to fake SIS IDs.
 """
 
-import psycopg2
-from psycopg2.extras import RealDictCursor
-from .db_config import DB_CONFIG
+from .db_utils import get_connection
 
 
 def seed_guid_sis_map():
     """Create guid_sis_map table and seed with demo data."""
-    connection = psycopg2.connect(
-        host=DB_CONFIG['host'],
-        user=DB_CONFIG['user'],
-        password=DB_CONFIG['password'],
-        dbname=DB_CONFIG['database'],
-        port=DB_CONFIG['port'],
-        cursor_factory=RealDictCursor
-    )
+    connection = get_connection()
     cursor = connection.cursor()
 
-    # Create table
-    cursor.execute("""
-        CREATE TABLE IF NOT EXISTS guid_sis_map (
-            student_guid TEXT PRIMARY KEY,
-            sis_id       TEXT NOT NULL
-        );
-    """)
-    print("✓ guid_sis_map table created/verified")
+    try:
+        # Create table
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS guid_sis_map (
+                student_guid TEXT PRIMARY KEY,
+                sis_id       TEXT NOT NULL
+            );
+        """)
+        print("✓ guid_sis_map table created/verified")
 
-    # Pick ~20 random GUIDs from student_level_with_predictions
-    cursor.execute("""
-        SELECT "Student_GUID"
-        FROM student_level_with_predictions
-        ORDER BY RANDOM()
-        LIMIT 20
-    """)
-    guids = [row['Student_GUID'] for row in cursor.fetchall()]
+        # Pick ~20 random GUIDs from student_level_with_predictions
+        cursor.execute("""
+            SELECT "Student_GUID"
+            FROM student_level_with_predictions
+            ORDER BY RANDOM()
+            LIMIT 20
+        """)
+        guids = [row['Student_GUID'] for row in cursor.fetchall()]
 
-    if not guids:
-        print("✗ No students found in student_level_with_predictions")
-        cursor.close()
-        connection.close()
-        return False
+        if not guids:
+            print("✗ No students found in student_level_with_predictions")
+            return False
+
+        # Clear existing demo data and insert fresh mappings
+        cursor.execute("DELETE FROM guid_sis_map")
 
-    # Clear existing demo data and insert fresh mappings
-    cursor.execute("DELETE FROM guid_sis_map")
+        for i, guid in enumerate(guids, start=100001):
+            sis_id = f"BSC-{i}"
+            cursor.execute(
+                "INSERT INTO guid_sis_map (student_guid, sis_id) VALUES (%s, %s)",
+                (guid, sis_id)
+            )
 
-    for i, guid in enumerate(guids, start=100001):
-        sis_id = f"BSC-{i}"
-        cursor.execute(
-            "INSERT INTO guid_sis_map (student_guid, sis_id) VALUES (%s, %s)",
-            (guid, sis_id)
-        )
+        connection.commit()
+        print(f"✓ Seeded {len(guids)} GUID → SIS ID mappings (BSC-100001 .. BSC-{100000 + len(guids)})")
 
-    connection.commit()
-    print(f"✓ Seeded {len(guids)} GUID → SIS ID mappings (BSC-100001 .. BSC-{100000 + len(guids)})")
+        # Verify
+        cursor.execute("SELECT COUNT(*) AS count FROM guid_sis_map")
+        count = cursor.fetchone()['count']
+        print(f"✓ Verified: {count} records in guid_sis_map")
 
-    # Verify
-    cursor.execute("SELECT COUNT(*) AS count FROM guid_sis_map")
-    count = cursor.fetchone()['count']
-    print(f"✓ Verified: {count} records in guid_sis_map")
+        return True
 
-    cursor.close()
-    connection.close()
-    return True
+    except Exception as e:
+        connection.rollback()
+        print(f"✗ Failed to seed guid_sis_map: {e}")
+        return False
+
+    finally:
+        cursor.close()
+        connection.close()
 
 
 if __name__ == "__main__":

From b70cdda96acbf85071ede663339b4c3c444a7df0 Mon Sep 17 00:00:00 2001
From: William Hill <mjh2225@gmail.com>
Date: Wed, 1 Apr 2026 20:02:42 -0400
Subject: [PATCH 09/15] feat(#78): add SIS deep-link API route with audit
 logging

---
 .../app/api/students/[guid]/sis-link/route.ts | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts

diff --git a/codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts b/codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts
new file mode 100644
index 0000000..c01091d
--- /dev/null
+++ b/codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts
@@ -0,0 +1,68 @@
+import { type NextRequest, NextResponse } from "next/server"
+import { mkdir, appendFile } from "fs/promises"
+import path from "path"
+import { getPool } from "@/lib/db"
+import type { Role } from "@/lib/roles"
+
+const ALLOWED_ROLES: Role[] = ["admin", "advisor", "ir"]
+
+const LOGS_DIR = path.join(process.cwd(), "logs")
+const LOG_FILE = path.join(LOGS_DIR, "query-history.jsonl")
+
+export async function GET(
+  request: NextRequest,
+  { params }: { params: Promise<{ guid: string }> }
+) {
+  // Feature disabled if SIS_BASE_URL is not configured
+  const sisBaseUrl = process.env.SIS_BASE_URL
+  if (!sisBaseUrl) {
+    return NextResponse.json({ url: null }, { status: 404 })
+  }
+
+  // Role check
+  const role = request.headers.get("x-user-role") as Role | null
+  if (!role || !ALLOWED_ROLES.includes(role)) {
+    return NextResponse.json({ error: "Forbidden" }, { status: 403 })
+  }
+
+  const { guid } = await params
+  if (!guid) {
+    return NextResponse.json({ error: "Missing student GUID" }, { status: 400 })
+  }
+
+  try {
+    // Look up SIS ID from mapping table
+    const pool = getPool()
+    const result = await pool.query(
+      "SELECT sis_id FROM guid_sis_map WHERE student_guid = $1 LIMIT 1",
+      [guid]
+    )
+
+    if (result.rows.length === 0) {
+      return NextResponse.json({ url: null }, { status: 404 })
+    }
+
+    // Build URL server-side — SIS ID never reaches the client
+    const sisIdParam = process.env.SIS_ID_PARAM || "id"
+    const sisId = result.rows[0].sis_id
+    const url = `${sisBaseUrl}?${encodeURIComponent(sisIdParam)}=${encodeURIComponent(sisId)}`
+
+    // Audit log — GUID and role only, never the SIS ID
+    const logEntry = {
+      event: "sis_link_accessed",
+      guid,
+      role,
+      timestamp: new Date().toISOString(),
+    }
+    await mkdir(LOGS_DIR, { recursive: true })
+    await appendFile(LOG_FILE, JSON.stringify(logEntry) + "\n", "utf8")
+
+    return NextResponse.json({ url })
+  } catch (error) {
+    console.error("SIS link lookup error:", error)
+    return NextResponse.json(
+      { error: "Failed to look up SIS link" },
+      { status: 500 }
+    )
+  }
+}

From c06d4f82e55dd8bb082be5c9ee85d8a2b897ac19 Mon Sep 17 00:00:00 2001
From: William Hill <mjh2225@gmail.com>
Date: Wed, 1 Apr 2026 20:06:09 -0400
Subject: [PATCH 10/15] fix(#78): use canAccess pattern and isolate audit log
 writes

---
 .../app/api/students/[guid]/sis-link/route.ts | 38 ++++++++++---------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts b/codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts
index c01091d..c389cde 100644
--- a/codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts
+++ b/codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts
@@ -2,9 +2,7 @@ import { type NextRequest, NextResponse } from "next/server"
 import { mkdir, appendFile } from "fs/promises"
 import path from "path"
 import { getPool } from "@/lib/db"
-import type { Role } from "@/lib/roles"
-
-const ALLOWED_ROLES: Role[] = ["admin", "advisor", "ir"]
+import { canAccess, type Role } from "@/lib/roles"
 
 const LOGS_DIR = path.join(process.cwd(), "logs")
 const LOG_FILE = path.join(LOGS_DIR, "query-history.jsonl")
@@ -21,7 +19,7 @@ export async function GET(
 
   // Role check
   const role = request.headers.get("x-user-role") as Role | null
-  if (!role || !ALLOWED_ROLES.includes(role)) {
+  if (!role || !canAccess("/api/students", role)) {
     return NextResponse.json({ error: "Forbidden" }, { status: 403 })
   }
 
@@ -30,6 +28,8 @@ export async function GET(
     return NextResponse.json({ error: "Missing student GUID" }, { status: 400 })
   }
 
+  let url: string
+
   try {
     // Look up SIS ID from mapping table
     const pool = getPool()
@@ -45,19 +45,7 @@ export async function GET(
     // Build URL server-side — SIS ID never reaches the client
     const sisIdParam = process.env.SIS_ID_PARAM || "id"
     const sisId = result.rows[0].sis_id
-    const url = `${sisBaseUrl}?${encodeURIComponent(sisIdParam)}=${encodeURIComponent(sisId)}`
-
-    // Audit log — GUID and role only, never the SIS ID
-    const logEntry = {
-      event: "sis_link_accessed",
-      guid,
-      role,
-      timestamp: new Date().toISOString(),
-    }
-    await mkdir(LOGS_DIR, { recursive: true })
-    await appendFile(LOG_FILE, JSON.stringify(logEntry) + "\n", "utf8")
-
-    return NextResponse.json({ url })
+    url = `${sisBaseUrl}?${encodeURIComponent(sisIdParam)}=${encodeURIComponent(sisId)}`
   } catch (error) {
     console.error("SIS link lookup error:", error)
     return NextResponse.json(
@@ -65,4 +53,20 @@ export async function GET(
       { status: 500 }
     )
   }
+
+  // Audit log — GUID and role only, never the SIS ID
+  const logEntry = {
+    event: "sis_link_accessed",
+    guid,
+    role,
+    timestamp: new Date().toISOString(),
+  }
+  try {
+    await mkdir(LOGS_DIR, { recursive: true })
+    await appendFile(LOG_FILE, JSON.stringify(logEntry) + "\n", "utf8")
+  } catch (auditErr) {
+    console.error("SIS audit log write failed:", auditErr)
+  }
+
+  return NextResponse.json({ url })
 }

From f7cd37e2fd819095cc80c1aa36691a9b54f584f7 Mon Sep 17 00:00:00 2001
From: William Hill <mjh2225@gmail.com>
Date: Wed, 1 Apr 2026 20:07:23 -0400
Subject: [PATCH 11/15] feat(#78): add Open in SIS button to student detail
 page

---
 .../app/students/[guid]/page.tsx              | 54 ++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/codebenders-dashboard/app/students/[guid]/page.tsx b/codebenders-dashboard/app/students/[guid]/page.tsx
index 90d3a10..b4f7e7f 100644
--- a/codebenders-dashboard/app/students/[guid]/page.tsx
+++ b/codebenders-dashboard/app/students/[guid]/page.tsx
@@ -2,7 +2,7 @@
 
 import { useEffect, useState } from "react"
 import { useParams, useRouter } from "next/navigation"
-import { ArrowLeft, ShieldCheck } from "lucide-react"
+import { ArrowLeft, ExternalLink, ShieldCheck } from "lucide-react"
 import { Button } from "@/components/ui/button"
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"
 
@@ -88,6 +88,8 @@ export default function StudentDetailPage() {
   const [student, setStudent] = useState<StudentDetail | null>(null)
   const [loading, setLoading] = useState(true)
   const [error, setError]     = useState<string | null>(null)
+  const [sisLink, setSisLink] = useState<string | null>(null)
+  const [sisStatus, setSisStatus] = useState<"loading" | "available" | "unavailable" | "hidden">("loading")
 
   useEffect(() => {
     if (!guid) return
@@ -102,6 +104,33 @@ export default function StudentDetailPage() {
       .catch(e => { setError(e.message); setLoading(false) })
   }, [guid])
 
+  useEffect(() => {
+    if (!guid) return
+    fetch(`/api/students/${encodeURIComponent(guid)}/sis-link`)
+      .then(r => {
+        if (r.status === 403) {
+          setSisStatus("hidden")
+          return null
+        }
+        if (r.status === 404) {
+          setSisStatus("unavailable")
+          return null
+        }
+        if (!r.ok) {
+          setSisStatus("hidden")
+          return null
+        }
+        return r.json()
+      })
+      .then(data => {
+        if (data?.url) {
+          setSisLink(data.url)
+          setSisStatus("available")
+        }
+      })
+      .catch(() => setSisStatus("hidden"))
+  }, [guid])
+
   // ─── Loading skeleton ────────────────────────────────────────────────────
 
   if (loading) {
@@ -179,6 +208,29 @@ export default function StudentDetailPage() {
               </div>
             </div>
             <div className="flex items-center gap-2">
+              {sisStatus === "available" && sisLink && (
+                <Button
+                  variant="outline"
+                  size="sm"
+                  className="gap-1.5"
+                  onClick={() => window.open(sisLink, "_blank", "noopener,noreferrer")}
+                >
+                  <ExternalLink className="h-3.5 w-3.5" />
+                  Open in SIS
+                </Button>
+              )}
+              {sisStatus === "unavailable" && (
+                <Button
+                  variant="outline"
+                  size="sm"
+                  className="gap-1.5 opacity-50 cursor-not-allowed"
+                  disabled
+                  title="No SIS record linked for this student"
+                >
+                  <ExternalLink className="h-3.5 w-3.5" />
+                  Open in SIS
+                </Button>
+              )}
               {student.at_risk_alert && (
                 <Badge
                   label={student.at_risk_alert}

From 30a617f8b5442933a43d75ce2322ab4b82cdf0c0 Mon Sep 17 00:00:00 2001
From: William Hill <mjh2225@gmail.com>
Date: Wed, 1 Apr 2026 20:17:15 -0400
Subject: [PATCH 12/15] fix(#78): handle loading state, null URL edge case, and
 button styling

---
 codebenders-dashboard/app/students/[guid]/page.tsx | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/codebenders-dashboard/app/students/[guid]/page.tsx b/codebenders-dashboard/app/students/[guid]/page.tsx
index b4f7e7f..98a1527 100644
--- a/codebenders-dashboard/app/students/[guid]/page.tsx
+++ b/codebenders-dashboard/app/students/[guid]/page.tsx
@@ -126,6 +126,8 @@ export default function StudentDetailPage() {
         if (data?.url) {
           setSisLink(data.url)
           setSisStatus("available")
+        } else if (data !== null) {
+          setSisStatus("unavailable")
         }
       })
       .catch(() => setSisStatus("hidden"))
@@ -208,6 +210,9 @@ export default function StudentDetailPage() {
               </div>
             </div>
             <div className="flex items-center gap-2">
+              {sisStatus === "loading" && (
+                <div className="h-7 w-24 rounded bg-muted animate-pulse" />
+              )}
               {sisStatus === "available" && sisLink && (
                 <Button
                   variant="outline"
@@ -223,7 +228,7 @@ export default function StudentDetailPage() {
                 <Button
                   variant="outline"
                   size="sm"
-                  className="gap-1.5 opacity-50 cursor-not-allowed"
+                  className="gap-1.5"
                   disabled
                   title="No SIS record linked for this student"
                 >

From a9d17ab96fb3945eea2ec51c616230c1f8e7d055 Mon Sep 17 00:00:00 2001
From: William Hill <mjh2225@gmail.com>
Date: Thu, 2 Apr 2026 09:29:03 -0400
Subject: [PATCH 13/15] fix(#78): add AbortController to SIS link fetch to
 prevent stale updates

---
 codebenders-dashboard/app/students/[guid]/page.tsx | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/codebenders-dashboard/app/students/[guid]/page.tsx b/codebenders-dashboard/app/students/[guid]/page.tsx
index 98a1527..084b403 100644
--- a/codebenders-dashboard/app/students/[guid]/page.tsx
+++ b/codebenders-dashboard/app/students/[guid]/page.tsx
@@ -106,7 +106,8 @@ export default function StudentDetailPage() {
 
   useEffect(() => {
     if (!guid) return
-    fetch(`/api/students/${encodeURIComponent(guid)}/sis-link`)
+    const controller = new AbortController()
+    fetch(`/api/students/${encodeURIComponent(guid)}/sis-link`, { signal: controller.signal })
       .then(r => {
         if (r.status === 403) {
           setSisStatus("hidden")
@@ -130,7 +131,10 @@ export default function StudentDetailPage() {
           setSisStatus("unavailable")
         }
       })
-      .catch(() => setSisStatus("hidden"))
+      .catch(err => {
+        if (err.name !== "AbortError") setSisStatus("hidden")
+      })
+    return () => controller.abort()
   }, [guid])
 
   // ─── Loading skeleton ────────────────────────────────────────────────────

From abc86282d9dc40cacbac8cc0e6204686a85b1534 Mon Sep 17 00:00:00 2001
From: William Hill <mjh2225@gmail.com>
Date: Thu, 2 Apr 2026 09:38:32 -0400
Subject: [PATCH 14/15] refactor(#78): simplify route, consolidate button
 markup, use executemany

---
 .../app/api/students/[guid]/sis-link/route.ts | 39 ++++++++-----------
 .../app/students/[guid]/page.tsx              | 18 ++-------
 operations/seed_guid_sis_map.py               | 21 +++-------
 3 files changed, 27 insertions(+), 51 deletions(-)

diff --git a/codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts b/codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts
index c389cde..8dea755 100644
--- a/codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts
+++ b/codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts
@@ -6,32 +6,40 @@ import { canAccess, type Role } from "@/lib/roles"
 
 const LOGS_DIR = path.join(process.cwd(), "logs")
 const LOG_FILE = path.join(LOGS_DIR, "query-history.jsonl")
+const SIS_ID_PARAM = process.env.SIS_ID_PARAM || "id"
+
+let logDirReady = false
+
+function writeAuditLog(entry: Record<string, unknown>) {
+  const doWrite = async () => {
+    if (!logDirReady) {
+      await mkdir(LOGS_DIR, { recursive: true })
+      logDirReady = true
+    }
+    await appendFile(LOG_FILE, JSON.stringify(entry) + "\n", "utf8")
+  }
+  doWrite().catch(err => console.error("SIS audit log write failed:", err))
+}
 
 export async function GET(
   request: NextRequest,
   { params }: { params: Promise<{ guid: string }> }
 ) {
-  // Feature disabled if SIS_BASE_URL is not configured
   const sisBaseUrl = process.env.SIS_BASE_URL
   if (!sisBaseUrl) {
     return NextResponse.json({ url: null }, { status: 404 })
   }
 
-  // Role check
   const role = request.headers.get("x-user-role") as Role | null
   if (!role || !canAccess("/api/students", role)) {
     return NextResponse.json({ error: "Forbidden" }, { status: 403 })
   }
 
   const { guid } = await params
-  if (!guid) {
-    return NextResponse.json({ error: "Missing student GUID" }, { status: 400 })
-  }
 
   let url: string
 
   try {
-    // Look up SIS ID from mapping table
     const pool = getPool()
     const result = await pool.query(
       "SELECT sis_id FROM guid_sis_map WHERE student_guid = $1 LIMIT 1",
@@ -42,10 +50,9 @@ export async function GET(
       return NextResponse.json({ url: null }, { status: 404 })
     }
 
-    // Build URL server-side — SIS ID never reaches the client
-    const sisIdParam = process.env.SIS_ID_PARAM || "id"
+    // SIS ID is embedded in the URL but never returned as a standalone field
     const sisId = result.rows[0].sis_id
-    url = `${sisBaseUrl}?${encodeURIComponent(sisIdParam)}=${encodeURIComponent(sisId)}`
+    url = `${sisBaseUrl}?${encodeURIComponent(SIS_ID_PARAM)}=${encodeURIComponent(sisId)}`
   } catch (error) {
     console.error("SIS link lookup error:", error)
     return NextResponse.json(
@@ -54,19 +61,7 @@ export async function GET(
     )
   }
 
-  // Audit log — GUID and role only, never the SIS ID
-  const logEntry = {
-    event: "sis_link_accessed",
-    guid,
-    role,
-    timestamp: new Date().toISOString(),
-  }
-  try {
-    await mkdir(LOGS_DIR, { recursive: true })
-    await appendFile(LOG_FILE, JSON.stringify(logEntry) + "\n", "utf8")
-  } catch (auditErr) {
-    console.error("SIS audit log write failed:", auditErr)
-  }
+  writeAuditLog({ event: "sis_link_accessed", guid, role, timestamp: new Date().toISOString() })
 
   return NextResponse.json({ url })
 }
diff --git a/codebenders-dashboard/app/students/[guid]/page.tsx b/codebenders-dashboard/app/students/[guid]/page.tsx
index 084b403..9ccaeb0 100644
--- a/codebenders-dashboard/app/students/[guid]/page.tsx
+++ b/codebenders-dashboard/app/students/[guid]/page.tsx
@@ -217,24 +217,14 @@ export default function StudentDetailPage() {
               {sisStatus === "loading" && (
                 <div className="h-7 w-24 rounded bg-muted animate-pulse" />
               )}
-              {sisStatus === "available" && sisLink && (
+              {(sisStatus === "available" || sisStatus === "unavailable") && (
                 <Button
                   variant="outline"
                   size="sm"
                   className="gap-1.5"
-                  onClick={() => window.open(sisLink, "_blank", "noopener,noreferrer")}
-                >
-                  <ExternalLink className="h-3.5 w-3.5" />
-                  Open in SIS
-                </Button>
-              )}
-              {sisStatus === "unavailable" && (
-                <Button
-                  variant="outline"
-                  size="sm"
-                  className="gap-1.5"
-                  disabled
-                  title="No SIS record linked for this student"
+                  disabled={sisStatus === "unavailable"}
+                  title={sisStatus === "unavailable" ? "No SIS record linked for this student" : undefined}
+                  onClick={sisLink ? () => window.open(sisLink, "_blank", "noopener,noreferrer") : undefined}
                 >
                   <ExternalLink className="h-3.5 w-3.5" />
                   Open in SIS
diff --git a/operations/seed_guid_sis_map.py b/operations/seed_guid_sis_map.py
index c15c2c6..5934bd6 100644
--- a/operations/seed_guid_sis_map.py
+++ b/operations/seed_guid_sis_map.py
@@ -14,7 +14,6 @@ def seed_guid_sis_map():
     cursor = connection.cursor()
 
     try:
-        # Create table
         cursor.execute("""
             CREATE TABLE IF NOT EXISTS guid_sis_map (
                 student_guid TEXT PRIMARY KEY,
@@ -23,7 +22,6 @@ def seed_guid_sis_map():
         """)
         print("✓ guid_sis_map table created/verified")
 
-        # Pick ~20 random GUIDs from student_level_with_predictions
         cursor.execute("""
             SELECT "Student_GUID"
             FROM student_level_with_predictions
@@ -36,23 +34,16 @@ def seed_guid_sis_map():
             print("✗ No students found in student_level_with_predictions")
             return False
 
-        # Clear existing demo data and insert fresh mappings
         cursor.execute("DELETE FROM guid_sis_map")
 
-        for i, guid in enumerate(guids, start=100001):
-            sis_id = f"BSC-{i}"
-            cursor.execute(
-                "INSERT INTO guid_sis_map (student_guid, sis_id) VALUES (%s, %s)",
-                (guid, sis_id)
-            )
+        rows = [(guid, f"BSC-{i}") for i, guid in enumerate(guids, start=100001)]
+        cursor.executemany(
+            "INSERT INTO guid_sis_map (student_guid, sis_id) VALUES (%s, %s)",
+            rows
+        )
 
         connection.commit()
-        print(f"✓ Seeded {len(guids)} GUID → SIS ID mappings (BSC-100001 .. BSC-{100000 + len(guids)})")
-
-        # Verify
-        cursor.execute("SELECT COUNT(*) AS count FROM guid_sis_map")
-        count = cursor.fetchone()['count']
-        print(f"✓ Verified: {count} records in guid_sis_map")
+        print(f"✓ Seeded {len(rows)} GUID → SIS ID mappings ({rows[0][1]} .. {rows[-1][1]})")
 
         return True
 

From c76fde766b3ac299a60c9d27dd441d8673752f28 Mon Sep 17 00:00:00 2001
From: William Hill <mjh2225@gmail.com>
Date: Thu, 2 Apr 2026 11:10:16 -0400
Subject: [PATCH 15/15] fix(#78): use URL API for robust query param handling

---
 .../app/api/students/[guid]/sis-link/route.ts                 | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts b/codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts
index 8dea755..bec6835 100644
--- a/codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts
+++ b/codebenders-dashboard/app/api/students/[guid]/sis-link/route.ts
@@ -52,7 +52,9 @@ export async function GET(
 
     // SIS ID is embedded in the URL but never returned as a standalone field
     const sisId = result.rows[0].sis_id
-    url = `${sisBaseUrl}?${encodeURIComponent(SIS_ID_PARAM)}=${encodeURIComponent(sisId)}`
+    const urlObj = new URL(sisBaseUrl)
+    urlObj.searchParams.set(SIS_ID_PARAM, sisId)
+    url = urlObj.toString()
   } catch (error) {
     console.error("SIS link lookup error:", error)
     return NextResponse.json(