diff --git a/.gitignore b/.gitignore index 7f3f076b..0f09637d 100644 --- a/.gitignore +++ b/.gitignore @@ -21,5 +21,15 @@ # # dist build output target/ +/server/bindings/ + +# Runtime capture configuration and local fallback capture logs. +/capture_config.json +/capture-events-fallback.jsonl +/capture-metrics-*.csv +/capture-analysis-*/ +/server/scripts/output +/server/scripts/capture-metrics-*.csv +/server/scripts/capture-analysis-*/ # CodeChat Editor lexer: python. See TODO. diff --git a/capture_config.example.json b/capture_config.example.json new file mode 100644 index 00000000..6a2e24b8 --- /dev/null +++ b/capture_config.example.json @@ -0,0 +1,9 @@ +{ + "host": "your-aws-rds-endpoint.amazonaws.com", + "port": 5432, + "user": "codechat_capture_writer", + "password": "your-db-password", + "dbname": "your-db-name", + "app_id": "dissertation", + "fallback_path": "capture-events-fallback.jsonl" +} diff --git a/client/src/CodeChatEditor.mts b/client/src/CodeChatEditor.mts index 672b8b6c..3df3bb86 100644 --- a/client/src/CodeChatEditor.mts +++ b/client/src/CodeChatEditor.mts @@ -693,8 +693,15 @@ export const on_error = (event: Event) => { let err_str: string; if (event instanceof ErrorEvent) { err_str = `${event.filename}:${event.lineno}: ${event.message}`; + if (event.error?.stack) { + err_str += `\n${event.error.stack}`; + } } else if (event instanceof PromiseRejectionEvent) { - err_str = `${event.promise} rejected: ${event.reason}`; + const reason = event.reason; + err_str = `${event.promise} rejected: ${reason}`; + if (reason instanceof Error && reason.stack) { + err_str += `\n${reason.stack}`; + } } else { err_str = `Unexpected error ${typeof event}: ${event}`; } diff --git a/client/src/shared.mts b/client/src/shared.mts index d3550b0f..caadb971 100644 --- a/client/src/shared.mts +++ b/client/src/shared.mts @@ -43,6 +43,8 @@ import { CodeMirrorDocBlockTuple } from "./rust-types/CodeMirrorDocBlockTuple.js import { UpdateMessageContents } from "./rust-types/UpdateMessageContents.js"; import { ResultOkTypes } from "./rust-types/ResultOkTypes.js"; import { ResultErrTypes } from "./rust-types/ResultErrTypes.js"; +import { CaptureEventWire } from "./rust-types/CaptureEventWire.js"; +import { CaptureStatus } from "./rust-types/CaptureStatus.js"; // Manually define this, since `ts-rs` can't export `webserver.MessageResult`. type MessageResult = { Ok: ResultOkTypes } | { Err: ResultErrTypes }; @@ -55,6 +57,8 @@ export type { CodeMirror, CodeMirrorDiffable, CodeMirrorDocBlockTuple, + CaptureEventWire, + CaptureStatus, CodeChatForWeb, EditorMessage, EditorMessageContents, diff --git a/extensions/VSCode/.gitignore b/extensions/VSCode/.gitignore index 8c5160c5..3780ba9f 100644 --- a/extensions/VSCode/.gitignore +++ b/extensions/VSCode/.gitignore @@ -33,5 +33,5 @@ src/index.d.ts src/index.js src/codechat-editor-client.win32-x64-msvc.node .windows/ - +*.log # CodeChat Editor lexer: python. See TODO. diff --git a/extensions/VSCode/package.json b/extensions/VSCode/package.json index c997ae3c..9b80fd3c 100644 --- a/extensions/VSCode/package.json +++ b/extensions/VSCode/package.json @@ -41,11 +41,7 @@ "type": "git", "url": "https://github.com/bjones1/CodeChat_Editor" }, - "version": "0.1.55", - "activationEvents": [ - "onCommand:extension.codeChatEditorActivate", - "onCommand:extension.codeChatEditorDeactivate" - ], + "version": "0.1.54-beta1", "contributes": { "configuration": { "title": "CodeChat Editor", @@ -62,6 +58,21 @@ "In the default external web browser" ], "markdownDescription": "Select the location of the CodeChat Editor Client. After changing this value, you **must** close then restart the CodeChat Editor extension." + }, + "CodeChatEditor.Capture.RecordStudyEvents": { + "type": "boolean", + "default": false, + "markdownDescription": "Record CodeChat dissertation capture events. This defaults to off. Consent is recorded through **Manage CodeChat Editor Capture** and also defaults to off.\n\n| Consent recorded | Record study events | What happens |\n| --- | --- | --- |\n| Off | Off | Capture is off. |\n| On | Off | Consent is retained, but recording is paused. |\n| On | On | Capture records study events. |\n| Off | On | Capture waits for consent before recording. |" + }, + "CodeChatEditor.Capture.ConsentEnabled": { + "type": "boolean", + "default": false, + "markdownDescription": "Record that participant consent has been given for CodeChat dissertation capture. This defaults to off and persists after setting." + }, + "CodeChatEditor.Capture.ParticipantId": { + "type": "string", + "default": "", + "markdownDescription": "Pseudonymous participant identifier used as the capture user_id. If left blank, CodeChat generates a UUID when the student gives consent." } } }, @@ -73,6 +84,14 @@ { "command": "extension.codeChatEditorDeactivate", "title": "Disable the CodeChat Editor" + }, + { + "command": "extension.codeChatCaptureStatus", + "title": "Manage CodeChat Editor Capture" + }, + { + "command": "extension.codeChatInsertReflectionPrompt", + "title": "CodeChat Editor: Insert Reflection Prompt" } ] }, diff --git a/extensions/VSCode/src/extension.ts b/extensions/VSCode/src/extension.ts index 9e003ea8..a7b9fa6b 100644 --- a/extensions/VSCode/src/extension.ts +++ b/extensions/VSCode/src/extension.ts @@ -3,7 +3,8 @@ // This file is part of the CodeChat Editor. The CodeChat Editor is free // software: you can redistribute it and/or modify it under the terms of the GNU // General Public License as published by the Free Software Foundation, either -// version 3 of the License, or (at your option) any later version. +// version 3 of the License, or (at your option) any later version of the GNU +// General Public License. // // The CodeChat Editor is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or @@ -40,6 +41,8 @@ import { CodeChatEditorServer, initServer } from "./index.js"; // ### Local packages import { auto_update_timeout_ms, + CaptureEventWire, + CaptureStatus, EditorMessage, EditorMessageContents, KeysOfRustEnum, @@ -53,6 +56,8 @@ import { } from "../../../client/src/debug_enabled.mjs"; import { ResultErrTypes } from "../../../client/src/rust-types/ResultErrTypes.js"; +import * as crypto from "crypto"; + // Globals // ------- enum CodeChatEditorClientLocation { @@ -60,6 +65,9 @@ enum CodeChatEditorClientLocation { browser, } +// Create a unique session ID for logging +const CAPTURE_SESSION_ID = crypto.randomUUID(); + // True on Windows, false on OS X / Linux. const is_windows = process.platform === "win32"; @@ -111,13 +119,1068 @@ let codeChatEditorServer: CodeChatEditorServer | undefined; initServer(ext.extensionPath); } +// --- +// +// CAPTURE (Dissertation instrumentation) +// -------------------------------------- + +// Capture uses these helpers only for documentation-like files. Source files +// classify directly as code; Markdown/RST get a finer split so prose edits count +// as documentation activity while embedded snippets count as code activity. +function isInMarkdownCodeFence( + doc: vscode.TextDocument, + line: number, +): boolean { + // Very simple fence tracker: toggles when encountering ``` or ~~~ at + // start of line. Good enough for dissertation instrumentation; refine later + // if needed. + let inFence = false; + for (let i = 0; i <= line; i++) { + const t = doc.lineAt(i).text.trim(); + if (t.startsWith("```") || t.startsWith("~~~")) { + inFence = !inFence; + } + } + return inFence; +} + +function isInRstCodeBlock(doc: vscode.TextDocument, line: number): boolean { + // Heuristic: find the most recent ".. code-block::" (or "::") and see if + // we're in its indented region. This won’t be perfect, but it’s far better + // than file-level classification. + let blockLine = -1; + for (let i = line; i >= 0; i--) { + const t = doc.lineAt(i).text; + const tt = t.trim(); + if (tt.startsWith(".. code-block::") || tt === "::") { + blockLine = i; + break; + } + // If we hit a non-indented line after searching upward too far, keep + // going; rst blocks can be separated by blank lines. + } + if (blockLine < 0) return false; + + // RST code block content usually begins after optional blank line(s), + // indented. Determine whether current line is indented relative to block + // directive line. + const cur = doc.lineAt(line).text; + if (cur.trim().length === 0) return false; + + // If it's indented at least one space/tab, treat it as inside block. + return /^\s+/.test(cur); +} + +function classifyAtPosition( + doc: vscode.TextDocument, + pos: vscode.Position, +): ActivityKind { + // These helpers are only for documentation-like documents that may embed + // source snippets. Plain source files skip this branch and classify as + // code. + if (DOC_LANG_IDS.has(doc.languageId)) { + if (doc.languageId === "markdown") { + return isInMarkdownCodeFence(doc, pos.line) ? "code" : "doc"; + } + if (doc.languageId === "restructuredtext") { + return isInRstCodeBlock(doc, pos.line) ? "code" : "doc"; + } + // Other doc types: default to doc + return "doc"; + } + return "code"; +} + +// Event-specific payload attached to a capture event. Study metadata such as +// group, course, assignment, and condition is intentionally excluded from the +// student-facing capture settings; analysis can join those values later from a +// researcher-managed participant/date mapping. +type CaptureEventData = Record; + +// Event names are generated from the Rust `CaptureEventType` enum, keeping the +// extension and server in sync without re-declaring the string union here. +type CaptureEventType = CaptureEventWire["event_type"]; + +// Student-facing capture settings. The setup is intentionally small: students +// give consent, toggle capture, and receive or reuse a pseudonymous participant +// UUID. Assignment, course, group, and study-condition metadata are inferred +// during analysis from that participant ID and event timestamps. +interface StudySettings { + // True when the student wants capture enabled for the current work session. + enabled: boolean; + // True after the student has consented to study capture. + consentEnabled: boolean; + // Pseudonymous UUID used as the event user ID; generated when absent. + participantId: string; +} + +// Derived state for the two user-visible capture checkboxes. This mirrors the +// table shown in Settings and is the single source of truth for whether events +// may be recorded. +type CaptureSettingsState = + | "off" + | "paused" + | "recording" + | "waitingForConsent"; + +const CAPTURE_SCHEMA_VERSION = 2; +const CAPTURE_EVENT_SOURCE = "vscode_extension"; +// Settings contribution key for the user-facing recording checkbox. The +// shorter `Enabled` setting is deliberately no longer used because it was too +// ambiguous next to consent. +const CAPTURE_RECORD_SETTING_NAME = "RecordStudyEvents"; +const DEFAULT_REFLECTION_PROMPTS = [ + "What changed in your understanding of this code?", + "What assumption are you making, and how could you test it?", + "What would another developer need to know before maintaining this?", +]; + +// Output channel used for capture diagnostics that should not interrupt normal +// editor use. +let capture_output_channel: vscode.OutputChannel | undefined; +// True after the first failed send is logged to the console, suppressing repeat +// console warnings while still writing detailed failures to the output channel. +let captureFailureLogged = false; +// True once the CodeChat Client and Server have completed enough startup +// handshake work for capture events to be accepted. +let captureTransportReady = false; +// True after a capture-enabled extension session has emitted `session_start`. +let extensionCaptureSessionStarted = false; +// Monotonic per-extension event sequence number used to order events produced +// by this VS Code session. +let captureSequenceNumber = 0; +// Status bar item that reports capture health and opens the capture controls. +let capture_status_bar_item: vscode.StatusBarItem | undefined; +// Timer used to refresh capture status from the running server. +let capture_status_timer: NodeJS.Timeout | undefined; +// Last capture settings snapshot used to audit user-visible setting changes +// without double-logging when a command and VS Code's configuration event both +// observe the same transition. +let lastCaptureSettings: StudySettings | undefined; + +// Simple classification of what the user is currently doing. `doc` means +// prose/documentation activity, whether in a Markdown/RST document or a +// CodeChat doc block; write events from the server provide the more precise +// doc-block classification when it is available. +type ActivityKind = "doc" | "code" | "other"; + +// Language IDs that we treat as "documentation" for the dissertation metrics. +// You can refine this later if you want. +const DOC_LANG_IDS = new Set([ + "markdown", + "plaintext", + "latex", + "restructuredtext", +]); + +// Track the last activity kind and when a reflective-writing (doc) session +// started. +let lastActivityKind: ActivityKind = "other"; +let docSessionStart: number | null = null; + +function optionalString(value: unknown): string | undefined { + return typeof value === "string" && value.trim().length > 0 + ? value.trim() + : undefined; +} + +function loadStudySettings(): StudySettings { + const config = vscode.workspace.getConfiguration("CodeChatEditor.Capture"); + return { + // Both capture settings default to false; persisted user values override + // these defaults after a student changes the Settings UI. + enabled: config.get(CAPTURE_RECORD_SETTING_NAME, false), + consentEnabled: config.get("ConsentEnabled", false), + participantId: optionalString(config.get("ParticipantId")) ?? "", + }; +} + +// Convert raw settings into the explicit four-row state table. Keeping this as +// a separate helper prevents callers from inventing their own partial rules. +function captureSettingsState(settings: StudySettings): CaptureSettingsState { + if (settings.consentEnabled && settings.enabled) { + return "recording"; + } + if (settings.consentEnabled) { + return "paused"; + } + if (settings.enabled) { + return "waitingForConsent"; + } + return "off"; +} + +// Compare complete settings snapshots so command-triggered changes and VS Code +// configuration notifications do not emit duplicate audit rows. +function captureSettingsEqual(a: StudySettings, b: StudySettings): boolean { + return ( + a.enabled === b.enabled && + a.consentEnabled === b.consentEnabled && + a.participantId === b.participantId + ); +} + +// Human-readable labels used in status-bar tooltips and QuickPick details. +function captureStateDescription(state: CaptureSettingsState): string { + switch (state) { + case "recording": + return "Capture records study events."; + case "paused": + return "Consent is retained, but recording is paused."; + case "waitingForConsent": + return "Capture waits for consent before recording."; + case "off": + return "Capture is off."; + } +} + +// Build the status bar text and tooltip from the same state table used for +// gating events. This keeps UI feedback and recording behavior aligned. +function captureSettingsStatus(settings: StudySettings): { + label: string; + tooltip: string; + state: CaptureSettingsState; +} { + const state = captureSettingsState(settings); + let label: string; + switch (state) { + case "recording": + label = "Capture: Recording"; + break; + case "paused": + label = "Capture: Paused"; + break; + case "waitingForConsent": + label = "Capture: Waiting for consent"; + break; + case "off": + label = "Capture: Off"; + break; + } + + return { + label, + state, + tooltip: [ + `Consent Enabled: ${settings.consentEnabled ? "On" : "Off"}`, + `Record Study Events: ${settings.enabled ? "On" : "Off"}`, + `State: ${captureStateDescription(state)}`, + ].join("\n"), + }; +} + +// Normal capture events are allowed only in the `recording` row. Audit and +// control events can bypass this through explicit send options. +function captureDisabledReason(settings: StudySettings): string | undefined { + const state = captureSettingsState(settings); + if (state !== "recording") { + return captureStateDescription(state); + } + return undefined; +} + +async function updateCaptureSetting( + name: string, + value: string | boolean, +): Promise { + const config = vscode.workspace.getConfiguration("CodeChatEditor.Capture"); + await config.update(name, value, vscode.ConfigurationTarget.Global); +} + +async function ensureParticipantId(): Promise { + const config = vscode.workspace.getConfiguration("CodeChatEditor.Capture"); + const existing = optionalString(config.get("ParticipantId")); + if (existing !== undefined) { + return existing; + } + + const generated = crypto.randomUUID(); + await config.update( + "ParticipantId", + generated, + vscode.ConfigurationTarget.Global, + ); + return generated; +} + +function hashText(value: string): string { + return crypto.createHash("sha256").update(value).digest("hex"); +} + +function buildFileFields( + filePath: string | undefined, +): Pick { + if (filePath === undefined) { + return { + language_id: vscode.window.activeTextEditor?.document.languageId, + }; + } + const document = get_document(filePath); + return { + file_hash: hashText(filePath), + language_id: document?.languageId, + }; +} + +function captureLog(message: string): void { + capture_output_channel?.appendLine( + `${new Date().toISOString()} ${message}`, + ); +} + +function capturePayloadSummary(payload: CaptureEventWire): string { + return [ + `type=${payload.event_type}`, + `event_id=${payload.event_id}`, + `sequence=${payload.sequence_number?.toString()}`, + `schema=${payload.schema_version}`, + `user_id=${payload.user_id}`, + `session_id=${payload.session_id}`, + `source=${payload.event_source}`, + `language=${payload.language_id ?? ""}`, + payload.file_hash ? `file_hash=${payload.file_hash}` : "", + ] + .filter((part) => part.length > 0) + .join(" "); +} + +function captureStatusSummary(status: CaptureStatus): string { + return [ + `state=${status.state}`, + `enabled=${status.enabled}`, + `queued=${status.queued_events}`, + `db=${status.persisted_events}`, + `fallback=${status.fallback_events}`, + `failed=${status.failed_events}`, + status.last_error ? `last_error=${status.last_error}` : "", + status.fallback_path ? `fallback_path=${status.fallback_path}` : "", + ] + .filter((part) => part.length > 0) + .join(" "); +} + +interface CaptureSendOptions { + // Permit audit/control events even when normal capture is paused or waiting + // for consent. + ignoreCaptureSettings?: boolean; + // Update server-side capture state without inserting this event into the DB. + controlOnly?: boolean; + // Explicit active flag carried to the server so it can enable/disable + // translation-generated write events. + captureActive?: boolean; + // Audit rows for consent being turned off still need the participant ID + // that existed before the setting changed. + userId?: string; +} + +// Helper to send a capture event to the Rust server. +async function sendCaptureEvent( + eventType: CaptureEventType, + filePath?: string, + data: CaptureEventData = {}, + options: CaptureSendOptions = {}, +): Promise { + const settings = loadStudySettings(); + const disabledReason = captureDisabledReason(settings); + // User activity events stop here unless both consent and recording are on. + if (!options.ignoreCaptureSettings && disabledReason !== undefined) { + captureLog(`capture skipped: ${eventType} (${disabledReason})`); + const status = captureSettingsStatus(settings); + updateCaptureStatusBar(status.label, status.tooltip); + return; + } + // Control-only messages may run after consent is off, so they must not + // generate a fresh participant ID. + const participantId = options.userId + ? options.userId + : options.controlOnly + ? settings.participantId || "capture_control" + : await ensureParticipantId(); + const fileFields = buildFileFields(filePath); + // The server uses `capture_active` to decide whether it may generate + // classified write_doc/write_code rows from translated edits. + const captureActive = + options.captureActive ?? + (eventType !== "session_end" && + captureSettingsState(settings) === "recording"); + const payload: CaptureEventWire = { + event_id: crypto.randomUUID(), + sequence_number: BigInt(++captureSequenceNumber), + schema_version: CAPTURE_SCHEMA_VERSION, + user_id: participantId, + session_id: CAPTURE_SESSION_ID, + event_source: CAPTURE_EVENT_SOURCE, + ...fileFields, + event_type: eventType, + client_timestamp_ms: BigInt(Date.now()), + client_tz_offset_min: new Date().getTimezoneOffset(), + data: { + ...data, + capture_active: captureActive, + // A control-only event updates the server's capture context but is + // intentionally not inserted into capture storage. + ...(options.controlOnly ? { capture_control_only: true } : {}), + }, + }; + + if (codeChatEditorServer === undefined) { + captureLog( + `capture skipped: ${capturePayloadSummary(payload)} (server not running)`, + ); + reportCaptureFailure("CodeChat server is not running"); + return; + } + if (!captureTransportReady) { + captureLog( + `capture skipped before server handshake: ${capturePayloadSummary(payload)}`, + ); + return; + } + + try { + const messageId = await codeChatEditorServer.sendCaptureEvent( + stringifyCapturePayload(payload), + ); + captureFailureLogged = false; + captureLog( + `${options.controlOnly ? "capture control queued" : "capture queued"} message_id=${messageId}: ${capturePayloadSummary(payload)}`, + ); + await refreshCaptureStatus(); + } catch (err) { + reportCaptureFailure(err instanceof Error ? err.message : String(err)); + } +} + +function stringifyCapturePayload(payload: CaptureEventWire): string { + return JSON.stringify(payload, (_key, value) => + typeof value === "bigint" ? Number(value) : value, + ); +} + +function reportCaptureFailure(message: string) { + capture_output_channel?.appendLine( + `${new Date().toISOString()} capture send failed: ${message}`, + ); + updateCaptureStatusBar("Capture: Error", message); + if (captureFailureLogged) { + return; + } + captureFailureLogged = true; + console.warn(`CodeChat capture event was not queued: ${message}`); +} + +function updateCaptureStatusBar(text: string, tooltip?: string) { + if (capture_status_bar_item === undefined) { + return; + } + capture_status_bar_item.text = text; + capture_status_bar_item.tooltip = tooltip; + capture_status_bar_item.show(); +} + +async function refreshCaptureStatus(): Promise { + const settings = loadStudySettings(); + const settingsStatus = captureSettingsStatus(settings); + // When the settings are not in the recording row, the settings state is the + // authoritative status regardless of the server's DB/fallback state. + if (settingsStatus.state !== "recording") { + updateCaptureStatusBar(settingsStatus.label, settingsStatus.tooltip); + return; + } + if (codeChatEditorServer === undefined) { + updateCaptureStatusBar( + "Capture: Waiting", + `${settingsStatus.tooltip}\nServer: CodeChat server is not running`, + ); + return; + } + + try { + const status = JSON.parse( + codeChatEditorServer.getCaptureStatus(), + ) as CaptureStatus; + let label: string; + switch (status.state) { + case "database": + label = "Capture: DB"; + break; + case "fallback": + label = "Capture: Fallback"; + break; + case "starting": + label = "Capture: Starting"; + break; + default: + label = "Capture: Off"; + break; + } + updateCaptureStatusBar( + label, + [ + settingsStatus.tooltip, + captureStatusSummary(status).split(" ").join("\n"), + ].join("\n"), + ); + } catch (err) { + updateCaptureStatusBar( + "Capture: Error", + err instanceof Error ? err.message : String(err), + ); + } +} + +// A status-bar QuickPick action. Each item owns the async work needed after the +// student chooses it, keeping the capture UI small and easy to scan. +interface CaptureStatusAction extends vscode.QuickPickItem { + run: () => Promise; +} + +function captureStatusDetails(): string { + const tooltip = capture_status_bar_item?.tooltip; + return typeof tooltip === "string" + ? tooltip + : (tooltip?.value ?? "Capture status unavailable"); +} + +async function setRecordStudyEvents(enabled: boolean): Promise { + // Save the previous settings before updating so the audit event can record + // exactly what changed. + const previousSettings = loadStudySettings(); + await updateCaptureSetting(CAPTURE_RECORD_SETTING_NAME, enabled); + await reconcileCaptureSettings( + "manage_capture_record_study_events", + previousSettings, + ); + + const updatedSettings = loadStudySettings(); + if (enabled && captureSettingsState(updatedSettings) === "recording") { + vscode.window.showInformationMessage( + "CodeChat capture is recording study events.", + ); + } else if (enabled) { + vscode.window.showInformationMessage( + "CodeChat capture is waiting for consent.", + ); + } else { + vscode.window.showInformationMessage( + "CodeChat capture recording is paused.", + ); + } +} + +async function setCaptureConsent(enabled: boolean): Promise { + // Save the previous settings before updating so the audit event can record + // consent transitions, including consent being turned off. + const previousSettings = loadStudySettings(); + + // Consent-on creates the pseudonymous participant ID up front, so the audit + // event and later study events use the same stable identifier. + if (enabled) { + await ensureParticipantId(); + } + await updateCaptureSetting("ConsentEnabled", enabled); + await reconcileCaptureSettings( + "manage_capture_consent_enabled", + previousSettings, + ); + + const updatedSettings = loadStudySettings(); + if (enabled && captureSettingsState(updatedSettings) === "recording") { + vscode.window.showInformationMessage( + "CodeChat capture consent is recorded and recording is on.", + ); + } else if (enabled) { + vscode.window.showInformationMessage( + "CodeChat capture consent is recorded.", + ); + } else { + vscode.window.showInformationMessage( + "CodeChat capture consent is off.", + ); + } +} + +async function giveConsentAndRecordStudyEvents(): Promise { + // This command intentionally changes both user-facing settings together, + // then lets the common reconcile path emit one combined audit event. + const previousSettings = loadStudySettings(); + + await ensureParticipantId(); + await updateCaptureSetting("ConsentEnabled", true); + await updateCaptureSetting(CAPTURE_RECORD_SETTING_NAME, true); + await reconcileCaptureSettings( + "manage_capture_give_consent_and_record", + previousSettings, + ); + vscode.window.showInformationMessage( + "CodeChat capture consent is recorded and recording is on.", + ); +} + +async function sendCaptureSettingsChangedEvent( + previous: StudySettings, + current: StudySettings, + changedBy: string, + filePath?: string, +): Promise { + // Only the consent and recording checkboxes are study-state transitions. + // Other capture settings, such as path hashing, should not create audit + // rows in the dissertation event stream. + const changedSettings: string[] = []; + if (previous.consentEnabled !== current.consentEnabled) { + changedSettings.push("ConsentEnabled"); + } + if (previous.enabled !== current.enabled) { + changedSettings.push(CAPTURE_RECORD_SETTING_NAME); + } + if (changedSettings.length === 0) { + return; + } + + // Prefer the current participant ID, but fall back to the previous value so + // turning consent off can still be attributed to the participant who opted + // out. + let participantId = current.participantId || previous.participantId; + if (current.consentEnabled && participantId.length === 0) { + participantId = await ensureParticipantId(); + } + if (participantId.length === 0) { + captureLog( + `capture settings change skipped: ${changedSettings.join(",")} (no participant id)`, + ); + return; + } + + const previousState = captureSettingsState(previous); + const currentState = captureSettingsState(current); + // This audit event is deliberately allowed even when capture is no longer + // active, because the transition itself is analytically important. + await sendCaptureEvent( + "capture_settings_changed", + filePath, + { + changed_by: changedBy, + changed_settings: changedSettings, + previous_state: previousState, + new_state: currentState, + previous_consent_enabled: previous.consentEnabled, + new_consent_enabled: current.consentEnabled, + previous_record_study_events: previous.enabled, + new_record_study_events: current.enabled, + capture_active_before: previousState === "recording", + capture_active_after: currentState === "recording", + }, + { + ignoreCaptureSettings: true, + captureActive: currentState === "recording", + userId: participantId, + }, + ); +} + +async function reconcileCaptureSettings( + changedBy: string = "settings_ui", + previousSettings?: StudySettings, +): Promise { + const active = vscode.window.activeTextEditor; + const filePath = active?.document.fileName; + const settings = loadStudySettings(); + // The first reconciliation after activation uses the snapshot captured at + // activation; command callers may also provide the pre-change snapshot. + const previous = lastCaptureSettings ?? previousSettings; + + // Commands update settings and VS Code then fires a configuration event. + // This guard keeps the DB audit trail to one row per actual transition. + if ( + lastCaptureSettings !== undefined && + captureSettingsEqual(lastCaptureSettings, settings) + ) { + await refreshCaptureStatus(); + return; + } + + // Write the audit row before changing the server active flag, so turning + // capture off records the transition but not any later edit events. + if (previous !== undefined) { + await sendCaptureSettingsChangedEvent( + previous, + settings, + changedBy, + filePath, + ); + } + + const updatedSettings = loadStudySettings(); + // Recording starts only when both checkboxes are on. + if (captureSettingsState(updatedSettings) === "recording") { + await startExtensionCaptureSession(filePath); + } else if ( + // If capture was active before this transition, send a control-only stop + // so the Rust translation layer stops emitting write_doc/write_code + // events from stale context. + extensionCaptureSessionStarted || + (previous !== undefined && + captureSettingsState(previous) === "recording") + ) { + await endExtensionCaptureSession(filePath, changedBy, { + controlOnly: true, + }); + } else { + // A stop-control is harmless when a server is present and keeps the + // server context inactive after settings-only transitions. + await sendCaptureStopControl(filePath, changedBy); + } + + // Refresh the dedupe snapshot after any participant ID generation or audit + // send that may have touched settings. + lastCaptureSettings = loadStudySettings(); + await refreshCaptureStatus(); +} + +async function copyParticipantId(): Promise { + const participantId = await ensureParticipantId(); + await vscode.env.clipboard.writeText(participantId); + vscode.window.showInformationMessage( + "CodeChat capture participant ID copied.", + ); +} + +async function showCaptureStatus(): Promise { + await refreshCaptureStatus(); + const settings = loadStudySettings(); + const settingsStatus = captureSettingsStatus(settings); + // The QuickPick exposes the same two independent switches as Settings, plus + // one convenience action that turns both on at once. + const actions: CaptureStatusAction[] = [ + { + label: "Show Current Capture State", + description: captureStateDescription(settingsStatus.state), + detail: settingsStatus.tooltip, + run: async () => { + captureLog(`capture status: ${settingsStatus.tooltip}`); + vscode.window.showInformationMessage(settingsStatus.tooltip); + }, + }, + ]; + + if (!settings.consentEnabled || !settings.enabled) { + actions.push({ + label: "Give Consent and Record Study Events", + description: "Turn both capture settings on.", + run: giveConsentAndRecordStudyEvents, + }); + } + + actions.push({ + label: settings.consentEnabled ? "Turn Consent Off" : "Turn Consent On", + description: settings.consentEnabled + ? "Stop recording if active; keep the recording setting unchanged." + : "Record participant consent; keep the recording setting unchanged.", + run: () => setCaptureConsent(!settings.consentEnabled), + }); + + actions.push({ + label: settings.enabled + ? "Turn Record Study Events Off" + : "Turn Record Study Events On", + description: settings.enabled + ? "Stop recording; keep consent unchanged." + : "Start recording only if consent is already on.", + run: () => setRecordStudyEvents(!settings.enabled), + }); + + actions.push( + { + label: "Copy Participant ID", + description: settings.participantId || "Generate a new UUID.", + run: copyParticipantId, + }, + { + label: "Show Capture Details", + description: captureStatusDetails().split("\n")[0], + run: async () => { + captureLog(`capture status: ${captureStatusDetails()}`); + vscode.window.showInformationMessage(captureStatusDetails()); + }, + }, + ); + + const selected = await vscode.window.showQuickPick(actions, { + placeHolder: "Manage CodeChat capture", + }); + if (selected !== undefined) { + await selected.run(); + } +} + +async function recordStudyLifecycleEvent( + eventType: CaptureEventType, +): Promise { + const active = vscode.window.activeTextEditor; + await sendCaptureEvent(eventType, active?.document.fileName, { + command: eventType, + languageId: active?.document.languageId, + }); +} + +function reflectionPromptText(languageId: string, prompt: string): string { + if (languageId === "markdown") { + return `\n\n### Reflection\n\n${prompt}\n\n`; + } + if (languageId === "restructuredtext") { + return `\n.. ${prompt}\n`; + } + if (languageId === "plaintext" || languageId === "latex") { + return `\n${prompt}\n`; + } + const commentPrefix = + languageId === "python" || + languageId === "shellscript" || + languageId === "powershell" || + languageId === "ruby" + ? "#" + : "//"; + return `\n${commentPrefix} Reflection: ${prompt}\n`; +} + +async function insertReflectionPrompt(): Promise { + const editor = vscode.window.activeTextEditor; + if (editor === undefined) { + vscode.window.showInformationMessage("Open a text editor first."); + return; + } + const prompt = await vscode.window.showQuickPick( + DEFAULT_REFLECTION_PROMPTS, + { + placeHolder: "Select a reflection prompt", + }, + ); + if (prompt === undefined) { + return; + } + + await editor.insertSnippet( + new vscode.SnippetString( + reflectionPromptText(editor.document.languageId, prompt), + ), + ); + await sendCaptureEvent( + "reflection_prompt_inserted", + editor.document.fileName, + { + prompt_hash: hashText(prompt), + prompt_length: prompt.length, + languageId: editor.document.languageId, + }, + ); +} + +async function startExtensionCaptureSession(filePath?: string) { + if (extensionCaptureSessionStarted) { + return; + } + if (captureDisabledReason(loadStudySettings()) !== undefined) { + return; + } + // Mark this before sending so recursive status refreshes do not emit a + // second session_start for the same extension session. + extensionCaptureSessionStarted = true; + await sendCaptureEvent("session_start", filePath, { + mode: "vscode_extension", + }); +} + +async function endExtensionCaptureSession( + filePath: string | undefined, + closedBy: string, + options: { controlOnly?: boolean } = {}, +): Promise { + if (!extensionCaptureSessionStarted) { + return; + } + if (options.controlOnly) { + // Consent/recording changes must stop server-side write classification + // without inserting a synthetic session_end row after the user opted + // out or paused recording. + docSessionStart = null; + await sendCaptureStopControl(filePath, closedBy); + extensionCaptureSessionStarted = false; + return; + } + await closeDocSession(filePath, closedBy); + await sendCaptureEvent("session_end", filePath, { + mode: "vscode_extension", + closed_by: closedBy, + }); + extensionCaptureSessionStarted = false; +} + +async function sendCaptureStopControl( + filePath: string | undefined, + closedBy: string, +): Promise { + if (codeChatEditorServer === undefined || !captureTransportReady) { + return; + } + // This message is sent through the normal capture channel so the server can + // clear its active capture context, but `capture_control_only` prevents it + // from becoming a DB row. + await sendCaptureEvent( + "session_end", + filePath, + { + mode: "vscode_extension", + closed_by: closedBy, + }, + { + ignoreCaptureSettings: true, + controlOnly: true, + captureActive: false, + }, + ); +} + +async function closeDocSession( + filePath: string | undefined, + closedBy: string, +): Promise { + if (docSessionStart === null) { + return; + } + + const durationMs = Date.now() - docSessionStart; + docSessionStart = null; + await sendCaptureEvent("doc_session", filePath, { + duration_ms: durationMs, + duration_seconds: durationMs / 1000.0, + closed_by: closedBy, + }); + await sendCaptureEvent("session_end", filePath, { + mode: "doc", + closed_by: closedBy, + }); +} + +// Update activity state and emit switch/doc-session events. Markdown/RST prose +// and CodeChat doc-block edits are both documentation activity for analysis; +// server-side write events classify CodeChat doc-block edits precisely, while +// this extension-side activity tracker uses the best cursor/file context +// available before translation. +function noteActivity(kind: ActivityKind, filePath?: string) { + const now = Date.now(); + + // Handle entering / leaving a "doc" session. + if (kind === "doc") { + if (docSessionStart === null) { + // Starting a new reflective-writing session. + docSessionStart = now; + sendCaptureEvent("session_start", filePath, { + mode: "doc", + }); + } + } else { + if (docSessionStart !== null) { + // Ending a reflective-writing session. + const durationMs = now - docSessionStart; + docSessionStart = null; + sendCaptureEvent("doc_session", filePath, { + duration_ms: durationMs, + duration_seconds: durationMs / 1000.0, + }); + sendCaptureEvent("session_end", filePath, { + mode: "doc", + }); + } + } + + // If we switched between doc and code, log a switch\_pane event. + const docOrCode = (k: ActivityKind) => k === "doc" || k === "code"; + if ( + docOrCode(lastActivityKind) && + docOrCode(kind) && + kind !== lastActivityKind + ) { + sendCaptureEvent("switch_pane", filePath, { + from: lastActivityKind, + to: kind, + }); + } + + lastActivityKind = kind; +} + // Activation/deactivation // ----------------------- // // This is invoked when the extension is activated. It either creates a new // CodeChat Editor Server instance or reveals the currently running one. export const activate = (context: vscode.ExtensionContext) => { + lastCaptureSettings = loadStudySettings(); + capture_output_channel = + vscode.window.createOutputChannel("CodeChat Capture"); + context.subscriptions.push(capture_output_channel); + capture_status_bar_item = vscode.window.createStatusBarItem( + vscode.StatusBarAlignment.Left, + 100, + ); + capture_status_bar_item.command = "extension.codeChatCaptureStatus"; + context.subscriptions.push(capture_status_bar_item); + capture_status_timer = setInterval(() => { + refreshCaptureStatus(); + }, 5000); + context.subscriptions.push({ + dispose: () => { + if (capture_status_timer !== undefined) { + clearInterval(capture_status_timer); + capture_status_timer = undefined; + } + }, + }); context.subscriptions.push( + vscode.workspace.onDidChangeConfiguration(async (event) => { + if (event.affectsConfiguration("CodeChatEditor.Capture")) { + await reconcileCaptureSettings("settings_ui"); + } + }), + ); + refreshCaptureStatus(); + + context.subscriptions.push( + vscode.commands.registerCommand( + "extension.codeChatCaptureStatus", + showCaptureStatus, + ), + vscode.commands.registerCommand( + "extension.codeChatInsertReflectionPrompt", + insertReflectionPrompt, + ), + // Study lifecycle commands are registered for optional study + // automation/keybindings, but they are not contributed to the Command + // Palette. Normal users should only see status and reflection commands. + vscode.commands.registerCommand( + "extension.codeChatCaptureTaskStart", + () => recordStudyLifecycleEvent("task_start"), + ), + vscode.commands.registerCommand( + "extension.codeChatCaptureTaskSubmit", + () => recordStudyLifecycleEvent("task_submit"), + ), + vscode.commands.registerCommand( + "extension.codeChatCaptureDebugTaskStart", + () => recordStudyLifecycleEvent("debug_task_start"), + ), + vscode.commands.registerCommand( + "extension.codeChatCaptureDebugTaskSubmit", + () => recordStudyLifecycleEvent("debug_task_submit"), + ), + vscode.commands.registerCommand( + "extension.codeChatCaptureHandoffStart", + () => recordStudyLifecycleEvent("handoff_start"), + ), + vscode.commands.registerCommand( + "extension.codeChatCaptureHandoffEnd", + () => recordStudyLifecycleEvent("handoff_end"), + ), vscode.commands.registerCommand( "extension.codeChatEditorDeactivate", deactivate, @@ -148,6 +1211,20 @@ export const activate = (context: vscode.ExtensionContext) => { event.reason }, ${format_struct(event.contentChanges)}.`, ); + + // CAPTURE: update session/switch state. The server + // classifies write_* events after parsing. + const doc = event.document; + const firstChange = event.contentChanges[0]; + const pos = firstChange.range.start; + const kind = classifyAtPosition(doc, pos); + + const filePath = doc.fileName; + + // Update our notion of current activity + doc + // session. + noteActivity(kind, filePath); + send_update(true); }), ); @@ -172,24 +1249,105 @@ export const activate = (context: vscode.ExtensionContext) => { ) { return; } + + // CAPTURE: update activity + possible + // switch\_pane/doc\_session. + const doc = event.document; + const pos = + event.selection?.active ?? + new vscode.Position(0, 0); + const kind = classifyAtPosition(doc, pos); + + const filePath = doc.fileName; + noteActivity(kind, filePath); + send_update(true); }), ); context.subscriptions.push( vscode.window.onDidChangeTextEditorSelection( - (_event) => { + (event) => { if (ignore_selection_change) { ignore_selection_change = false; return; } + console_log( "CodeChat Editor extension: sending updated cursor/scroll position.", ); + + // CAPTURE: treat a selection change as "activity" + // in this document. + const doc = event.textEditor.document; + const pos = + event.selections?.[0]?.active ?? + event.textEditor.selection.active; + const kind = classifyAtPosition(doc, pos); + const filePath = doc.fileName; + noteActivity(kind, filePath); + send_update(false); }, ), ); + + // CAPTURE: listen for file saves. + context.subscriptions.push( + vscode.workspace.onDidSaveTextDocument((doc) => { + sendCaptureEvent("save", doc.fileName, { + reason: "manual_save", + languageId: doc.languageId, + lineCount: doc.lineCount, + }); + }), + ); + + // CAPTURE: start and end of a debug/run session. + context.subscriptions.push( + vscode.debug.onDidStartDebugSession((session) => { + const active = vscode.window.activeTextEditor; + const filePath = active?.document.fileName; + sendCaptureEvent("run", filePath, { + sessionName: session.name, + sessionType: session.type, + }); + }), + vscode.debug.onDidTerminateDebugSession((session) => { + const active = vscode.window.activeTextEditor; + const filePath = active?.document.fileName; + sendCaptureEvent("run_end", filePath, { + sessionName: session.name, + sessionType: session.type, + }); + }), + ); + + // CAPTURE: start and end compile/build events via VS Code + // tasks. + context.subscriptions.push( + vscode.tasks.onDidStartTaskProcess((e) => { + const active = vscode.window.activeTextEditor; + const filePath = active?.document.fileName; + const task = e.execution.task; + sendCaptureEvent("compile", filePath, { + taskName: task.name, + taskSource: task.source, + definition: task.definition, + processId: e.processId, + }); + }), + vscode.tasks.onDidEndTaskProcess((e) => { + const active = vscode.window.activeTextEditor; + const filePath = active?.document.fileName; + const task = e.execution.task; + sendCaptureEvent("compile_end", filePath, { + taskName: task.name, + taskSource: task.source, + exitCode: e.exitCode, + }); + }), + ); } // Get the CodeChat Client's location from the VSCode @@ -277,6 +1435,10 @@ export const activate = (context: vscode.ExtensionContext) => { // Start the server. console_log("CodeChat Editor extension: starting server."); codeChatEditorServer = new CodeChatEditorServer(); + captureFailureLogged = false; + captureTransportReady = false; + extensionCaptureSessionStarted = false; + refreshCaptureStatus(); const hosted_in_ide = codechat_client_location === @@ -285,6 +1447,7 @@ export const activate = (context: vscode.ExtensionContext) => { `CodeChat Editor extension: sending message Opened(${hosted_in_ide}).`, ); await codeChatEditorServer.sendMessageOpened(hosted_in_ide); + // For the external browser, we can immediately send the // `CurrentFile` message. For the WebView, we must first wait to // receive the HTML for the WebView (the `ClientHtml` message). @@ -292,6 +1455,11 @@ export const activate = (context: vscode.ExtensionContext) => { codechat_client_location === CodeChatEditorClientLocation.browser ) { + captureTransportReady = true; + const active = vscode.window.activeTextEditor; + await startExtensionCaptureSession( + active?.document.fileName, + ); send_update(false); } @@ -301,6 +1469,7 @@ export const activate = (context: vscode.ExtensionContext) => { console_log("CodeChat Editor extension: queue closed."); break; } + // Parse the data into a message. const { id, message } = JSON.parse( message_raw, @@ -335,16 +1504,19 @@ export const activate = (context: vscode.ExtensionContext) => { } if (current_update.contents !== undefined) { const source = current_update.contents.source; + // This will produce a change event, which we'll // ignore. The change may also produce a // selection change, which should also be // ignored. ignore_text_document_change = true; ignore_selection_change = true; + // Use a workspace edit, since calls to // `TextEditor.edit` must be made to the active // editor only. const wse = new vscode.WorkspaceEdit(); + // Is this plain text, or a diff? if ("Plain" in source) { wse.replace( @@ -361,6 +1533,7 @@ export const activate = (context: vscode.ExtensionContext) => { ); } else { assert("Diff" in source); + // If this diff was not made against the // text we currently have, reject it. if (source.Diff.version !== version) { @@ -380,8 +1553,8 @@ export const activate = (context: vscode.ExtensionContext) => { } const diffs = source.Diff.doc; for (const diff of diffs) { - // Convert from character offsets from the - // beginning of the document to a + // Convert from character offsets from + // the beginning of the document to a // `Position` (line, then offset on that // line) needed by VSCode. const from = doc.positionAt(diff.from); @@ -415,11 +1588,12 @@ export const activate = (context: vscode.ExtensionContext) => { // Update the cursor and scroll position if // provided. const editor = get_text_editor(doc); + const scroll_line = current_update.scroll_position; if (scroll_line !== undefined && editor) { - // Don't set `ignore_scroll_position` here, - // since `revealRange` doesn't change the - // editor's text selection. + // Don't set `ignore_selection_change` here: + // `revealRange` doesn't change the editor's + // text selection. const scroll_position = new vscode.Position( // The VSCode line is zero-based; the // CodeMirror line is one-based. @@ -439,9 +1613,15 @@ export const activate = (context: vscode.ExtensionContext) => { const cursor_position = current_update.cursor_position; - if (cursor_position !== undefined && editor) { - assert("Line" in cursor_position); - const cursor_line = cursor_position.Line; + if ( + cursor_position !== undefined && + typeof cursor_position === "object" && + "Line" in cursor_position && + editor + ) { + const cursor_line = ( + cursor_position as { Line: number } + ).Line; ignore_selection_change = true; const vscode_cursor_position = new vscode.Position( @@ -509,19 +1689,13 @@ export const activate = (context: vscode.ExtensionContext) => { .executeCommand( "vscode.open", vscode.Uri.file(current_file), - { - viewColumn: - current_editor?.viewColumn, - }, + { viewColumn: current_editor?.viewColumn }, ) .then( async () => await sendResult(id), async (reason) => await sendResult(id, { - OpenFileFailed: [ - current_file, - reason, - ], + OpenFileFailed: [current_file, reason], }), ); */ @@ -595,6 +1769,11 @@ export const activate = (context: vscode.ExtensionContext) => { assert(webview_panel !== undefined); webview_panel.webview.html = client_html; await sendResult(id); + captureTransportReady = true; + const active = vscode.window.activeTextEditor; + await startExtensionCaptureSession( + active?.document.fileName, + ); // Now that the Client is loaded, send the editor's // current file to the server. send_update(false); @@ -603,9 +1782,7 @@ export const activate = (context: vscode.ExtensionContext) => { default: console.error( - `Unhandled message ${key}(${format_struct( - value, - )}`, + `Unhandled message ${key}(${format_struct(value)}`, ); break; } @@ -618,6 +1795,13 @@ export const activate = (context: vscode.ExtensionContext) => { // On deactivation, close everything down. export const deactivate = async () => { console_log("CodeChat Editor extension: deactivating."); + + const active = vscode.window.activeTextEditor; + await endExtensionCaptureSession( + active?.document.fileName, + "extension_deactivate", + ); + await stop_client(); webview_panel?.dispose(); console_log("CodeChat Editor extension: deactivated."); @@ -640,7 +1824,9 @@ const format_struct = (complex_data_structure: any): string => const sendResult = async (id: number, result?: ResultErrTypes) => { assert(codeChatEditorServer); console_log( - `CodeChat Editor extension: sending Result(id = ${id}, ${format_struct(result)}).`, + `CodeChat Editor extension: sending Result(id = ${id}, ${format_struct( + result, + )}).`, ); try { await codeChatEditorServer.sendResult( @@ -701,13 +1887,17 @@ const send_update = (this_is_dirty: boolean) => { const scroll_position = current_editor!.visibleRanges[0].start.line + 1; const file_path = current_editor!.document.fileName; + // Send contents only if necessary. const option_contents: null | [string, number] = is_dirty ? [current_editor!.document.getText(), (version = rand())] : null; is_dirty = false; + console_log( - `CodeChat Editor extension: sending Update(${file_path}, ${cursor_position}, ${scroll_position}, ${format_struct(option_contents)})`, + `CodeChat Editor extension: sending Update(${file_path}, ${cursor_position}, ${scroll_position}, ${format_struct( + option_contents, + )})`, ); await codeChatEditorServer!.sendMessageUpdatePlain( file_path, @@ -724,14 +1914,19 @@ const send_update = (this_is_dirty: boolean) => { // well. const stop_client = async () => { console_log("CodeChat Editor extension: stopping client."); + const active = vscode.window.activeTextEditor; + await endExtensionCaptureSession( + active?.document.fileName, + "client_stopped", + ); if (codeChatEditorServer !== undefined) { console_log("CodeChat Editor extension: stopping server."); await codeChatEditorServer.stopServer(); codeChatEditorServer = undefined; } + captureTransportReady = false; + await refreshCaptureStatus(); - // Shut the timer down after the client is undefined, to ensure it can't be - // started again by a call to `start_render()`. if (idle_timer !== undefined) { clearTimeout(idle_timer); idle_timer = undefined; @@ -748,7 +1943,6 @@ const show_error = (message: string) => { } console.error(`CodeChat Editor extension: ${message}`); if (webview_panel !== undefined) { - // If the panel was displaying other content, reset it for errors. if ( !webview_panel.webview.html.startsWith("

CodeChat Editor

") ) { diff --git a/extensions/VSCode/src/lib.rs b/extensions/VSCode/src/lib.rs index ceec586a..aa8169fa 100644 --- a/extensions/VSCode/src/lib.rs +++ b/extensions/VSCode/src/lib.rs @@ -80,6 +80,19 @@ impl CodeChatEditorServer { self.0.send_message_opened(hosted_in_ide).await } + #[napi] + pub async fn send_capture_event(&self, capture_event_json: String) -> std::io::Result { + let capture_event = serde_json::from_str(&capture_event_json) + .map_err(|err| std::io::Error::other(err.to_string()))?; + self.0.send_capture_event(capture_event).await + } + + #[napi] + pub fn get_capture_status(&self) -> Result { + serde_json::to_string(&self.0.capture_status()) + .map_err(|err| Error::new(Status::GenericFailure, err.to_string())) + } + #[napi] pub async fn send_message_current_file(&self, url: String) -> std::io::Result { self.0.send_message_current_file(url).await diff --git a/server/Cargo.lock b/server/Cargo.lock index 27c1c786..c7e3d2a6 100644 --- a/server/Cargo.lock +++ b/server/Cargo.lock @@ -777,6 +777,7 @@ dependencies = [ "regex", "serde", "serde_json", + "sha2 0.11.0", "test_utils", "thirtyfour", "thiserror", diff --git a/server/Cargo.toml b/server/Cargo.toml index 7c4cab2b..5120fe74 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -94,6 +94,7 @@ rand = "0.10" regex = "1" serde = { version = "1", features = ["derive"] } serde_json = "1" +sha2 = "0.11" test_utils = { path = "../test_utils" } thiserror = "2.0.12" tokio = { version = "1", features = ["full"] } diff --git a/server/log4rs.yml b/server/log4rs.yml index 544068f2..d534ba2a 100644 --- a/server/log4rs.yml +++ b/server/log4rs.yml @@ -40,7 +40,7 @@ loggers: level: warn root: - level: info + level: debug appenders: - console_appender - file_appender \ No newline at end of file diff --git a/server/scripts/capture_events_schema.sql b/server/scripts/capture_events_schema.sql new file mode 100644 index 00000000..dece1474 --- /dev/null +++ b/server/scripts/capture_events_schema.sql @@ -0,0 +1,183 @@ +-- CodeChat capture event schema for dissertation analysis. +-- +-- This script updates an existing legacy `events` table to the lean capture +-- schema used for dissertation telemetry. It converts `timestamp` and `data` to +-- analysis-friendly PostgreSQL types and backfills typed telemetry from +-- older JSON payloads where possible. New capture code writes known telemetry +-- metadata to first-class columns and reserves `data` for event-specific +-- details. Study metadata such as course, group, +-- assignment, condition, and task is intentionally omitted: those values are +-- joined during analysis from researcher-managed participant/date mappings. + +BEGIN; + +CREATE TABLE IF NOT EXISTS public.events ( + id BIGSERIAL PRIMARY KEY, + event_id TEXT, + sequence_number BIGINT, + schema_version INTEGER, + user_id TEXT NOT NULL, + session_id TEXT, + event_source TEXT, + language_id TEXT, + file_hash TEXT, + event_type TEXT NOT NULL, + "timestamp" TIMESTAMPTZ NOT NULL DEFAULT now(), + client_timestamp_ms BIGINT, + client_tz_offset_min INTEGER, + data JSONB NOT NULL DEFAULT '{}'::jsonb, + inserted_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS event_id TEXT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS sequence_number BIGINT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS schema_version INTEGER; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS session_id TEXT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS event_source TEXT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS language_id TEXT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS file_hash TEXT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS client_timestamp_ms BIGINT; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS client_tz_offset_min INTEGER; +ALTER TABLE public.events ADD COLUMN IF NOT EXISTS inserted_at TIMESTAMPTZ NOT NULL DEFAULT now(); + +ALTER TABLE public.events DROP COLUMN IF EXISTS assignment_id; +ALTER TABLE public.events DROP COLUMN IF EXISTS group_id; +ALTER TABLE public.events DROP COLUMN IF EXISTS condition; +ALTER TABLE public.events DROP COLUMN IF EXISTS course_id; +ALTER TABLE public.events DROP COLUMN IF EXISTS task_id; +ALTER TABLE public.events DROP COLUMN IF EXISTS capture_mode; +ALTER TABLE public.events DROP COLUMN IF EXISTS file_path; +ALTER TABLE public.events DROP COLUMN IF EXISTS path_privacy; +ALTER TABLE public.events DROP COLUMN IF EXISTS server_timestamp_ms; + +DO $$ +DECLARE + current_type TEXT; +BEGIN + SELECT data_type INTO current_type + FROM information_schema.columns + WHERE table_schema = 'public' + AND table_name = 'events' + AND column_name = 'timestamp'; + + IF current_type IS DISTINCT FROM 'timestamp with time zone' THEN + ALTER TABLE public.events + ALTER COLUMN "timestamp" TYPE TIMESTAMPTZ + USING COALESCE(NULLIF("timestamp"::text, '')::timestamptz, now()); + END IF; +END $$; + +DO $$ +DECLARE + current_type TEXT; +BEGIN + SELECT data_type INTO current_type + FROM information_schema.columns + WHERE table_schema = 'public' + AND table_name = 'events' + AND column_name = 'data'; + + IF current_type IS DISTINCT FROM 'jsonb' THEN + ALTER TABLE public.events + ALTER COLUMN data TYPE JSONB + USING CASE + WHEN data IS NULL OR btrim(data::text) = '' THEN '{}'::jsonb + ELSE data::jsonb + END; + END IF; +END $$; + +UPDATE public.events +SET data = '{}'::jsonb +WHERE data IS NULL; + +ALTER TABLE public.events ALTER COLUMN data SET DEFAULT '{}'::jsonb; +ALTER TABLE public.events ALTER COLUMN data SET NOT NULL; +ALTER TABLE public.events ALTER COLUMN "timestamp" SET DEFAULT now(); +ALTER TABLE public.events ALTER COLUMN "timestamp" SET NOT NULL; + +UPDATE public.events +SET + event_id = COALESCE(event_id, NULLIF(data->>'event_id', '')), + sequence_number = COALESCE( + sequence_number, + CASE + WHEN data->>'sequence_number' ~ '^-?[0-9]+$' + THEN (data->>'sequence_number')::bigint + END + ), + schema_version = COALESCE( + schema_version, + CASE + WHEN data->>'schema_version' ~ '^-?[0-9]+$' + THEN (data->>'schema_version')::integer + END + ), + session_id = COALESCE(session_id, NULLIF(data->>'session_id', '')), + event_source = COALESCE(event_source, NULLIF(data->>'event_source', '')), + language_id = COALESCE( + language_id, + NULLIF(data->>'language_id', ''), + NULLIF(data->>'languageId', '') + ), + file_hash = COALESCE(file_hash, NULLIF(data->>'file_hash', '')), + client_timestamp_ms = COALESCE( + client_timestamp_ms, + CASE + WHEN data->>'client_timestamp_ms' ~ '^-?[0-9]+$' + THEN (data->>'client_timestamp_ms')::bigint + END + ), + client_tz_offset_min = COALESCE( + client_tz_offset_min, + CASE + WHEN data->>'client_tz_offset_min' ~ '^-?[0-9]+$' + THEN (data->>'client_tz_offset_min')::integer + END + ); + +CREATE INDEX IF NOT EXISTS events_timestamp_idx + ON public.events ("timestamp"); + +CREATE INDEX IF NOT EXISTS events_type_timestamp_idx + ON public.events (event_type, "timestamp"); + +CREATE INDEX IF NOT EXISTS events_participant_session_idx + ON public.events (user_id, session_id); + +CREATE INDEX IF NOT EXISTS events_file_hash_idx + ON public.events (file_hash) + WHERE file_hash IS NOT NULL; + +CREATE INDEX IF NOT EXISTS events_event_id_idx + ON public.events (event_id) + WHERE event_id IS NOT NULL; + +CREATE INDEX IF NOT EXISTS events_data_gin_idx + ON public.events USING GIN (data); + +COMMENT ON TABLE public.events IS + 'CodeChat dissertation capture events. Course, group, assignment, condition, and task context are joined during analysis from participant/date mappings.'; +COMMENT ON COLUMN public.events.user_id IS 'Pseudonymous participant UUID generated or supplied by the VS Code extension.'; +COMMENT ON COLUMN public.events.session_id IS 'Capture session UUID emitted by the VS Code extension.'; +COMMENT ON COLUMN public.events.file_hash IS 'SHA-256 hash of the local file path; raw local paths are not stored.'; +COMMENT ON COLUMN public.events."timestamp" IS 'Server receive/record timestamp in UTC.'; +COMMENT ON COLUMN public.events.client_timestamp_ms IS 'Optional client-observed event timestamp in milliseconds since Unix epoch.'; +COMMENT ON COLUMN public.events.data IS 'Event-specific JSON payload. Known telemetry metadata lives in typed columns.'; + +-- Least-privilege deployment guidance: +-- students or classroom machines should use a dedicated writer account, not a +-- database owner or administrator account. After replacing the placeholder +-- password/database/user names, a database administrator can grant only the +-- permissions needed for capture inserts: +-- +-- CREATE ROLE codechat_capture_writer LOGIN PASSWORD 'replace-with-secret'; +-- GRANT CONNECT ON DATABASE codechat_capture TO codechat_capture_writer; +-- GRANT USAGE ON SCHEMA public TO codechat_capture_writer; +-- GRANT INSERT ON public.events TO codechat_capture_writer; +-- GRANT USAGE ON SEQUENCE public.events_id_seq TO codechat_capture_writer; +-- +-- Do not grant SELECT, UPDATE, DELETE, CREATE, or ownership privileges to the +-- writer account used in `capture_config.json`. + +COMMIT; diff --git a/server/src/capture.rs b/server/src/capture.rs index 3f8f7c15..d3a8275e 100644 --- a/server/src/capture.rs +++ b/server/src/capture.rs @@ -13,227 +13,1222 @@ // You should have received a copy of the GNU General Public License along with // the CodeChat Editor. If not, see // [http://www.gnu.org/licenses](http://www.gnu.org/licenses). -/// # `Capture.rs` -- Capture CodeChat Editor Events -// ## Submodules + +// `capture.rs` -- Capture CodeChat Editor Events +// ============================================================================ +// +// This module provides an asynchronous event capture facility backed by a +// PostgreSQL database. It is designed to support the dissertation study by +// recording process-level data such as: +// +// * Frequency and timing of writing entries +// * Edits to documentation and code +// * Switches between documentation and coding activity +// * Duration of engagement with reflective writing +// * Save, compile, and run events +// +// Events are sent from the client (browser and/or VS Code extension) to the +// server as JSON. The server enqueues events into an asynchronous worker which +// performs batched inserts into the `events` table. +// +// Database schema +// ---------------------------------------------------------------------------- // -// ## Imports +// The canonical schema and migration DDL lives in +// `server/scripts/capture_events_schema.sql`. The important analysis columns +// are: // -// Standard library -use indoc::indoc; -use std::fs; -use std::io; -use std::path::Path; -use std::sync::Arc; - -// Third-party -use chrono::Local; -use log::{error, info}; +// ```sql +// event_id, sequence_number, schema_version, +// user_id, session_id, event_source, language_id, file_hash, event_type, +// timestamp, client_timestamp_ms, client_tz_offset_min, data +// ``` +// +// * `user_id` – pseudonymous participant UUID. Course, group, assignment, and +// study condition are intentionally joined later from researcher-managed +// participant/date mappings instead of being configured by students. +// * `session_id`, `event_id`, `sequence_number`, `schema_version` – event +// integrity and versioning metadata. +// * `file_hash` – privacy-preserving SHA-256 hash of the local file path. +// * `event_type` – coarse event type (see `CaptureEventType` below). +// * `timestamp` – server receive/record timestamp (in UTC). +// * `client_timestamp_ms` – optional client-observed event time for ordering +// and latency analysis. +// * `data` – JSONB payload with event-specific details. + +use std::{ + env, + fs::{self, OpenOptions}, + io::{self, Write}, + path::{Path, PathBuf}, + process, + sync::atomic::{AtomicU64, Ordering}, + sync::{Arc, Mutex}, + thread, +}; + +use chrono::{DateTime, Utc}; +use log::{debug, error, info, warn}; use serde::{Deserialize, Serialize}; -use tokio::sync::Mutex; +use sha2::{Digest, Sha256}; +use std::error::Error; +use tokio::sync::mpsc; use tokio_postgres::{Client, NoTls}; +use ts_rs::TS; -// Local +static NEXT_CAPTURE_EVENT_ID: AtomicU64 = AtomicU64::new(1); -/* ## The Event Structure: +/// Canonical event types. Keep the serialized strings stable for analysis. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, TS)] +#[serde(rename_all = "snake_case")] +#[ts(export)] +pub enum CaptureEventType { + /// Edit to documentation/prose. In CodeChat files this means doc blocks; + /// fenced or embedded code content is classified as `WriteCode`. + WriteDoc, + /// Edit to executable source code, including code inside CodeChat blocks. + WriteCode, + /// Editor activity moved between documentation and code contexts. + SwitchPane, + /// Duration summary for a documentation/prose activity interval. + DocSession, + /// File save observed by the editor. + Save, + /// Compile/build task started. + Compile, + /// Debug/run session started. + Run, + /// Capture or activity session started. + SessionStart, + /// Capture or activity session ended. + SessionEnd, + /// Consent or recording settings changed. + CaptureSettingsChanged, + /// Compile/build task ended. + CompileEnd, + /// Debug/run session ended. + RunEnd, + /// Study task started by an external study workflow. + TaskStart, + /// Study task submitted by an external study workflow. + TaskSubmit, + /// Debugging study task started by an external study workflow. + DebugTaskStart, + /// Debugging study task submitted by an external study workflow. + DebugTaskSubmit, + /// Collaboration handoff interval started. + HandoffStart, + /// Collaboration handoff interval ended. + HandoffEnd, + /// A built-in reflection prompt was inserted into the active editor. + ReflectionPromptInserted, +} - The `Event` struct represents an event to be stored in the database. +impl CaptureEventType { + pub const fn as_str(self) -> &'static str { + match self { + Self::WriteDoc => "write_doc", + Self::WriteCode => "write_code", + Self::SwitchPane => "switch_pane", + Self::DocSession => "doc_session", + Self::Save => "save", + Self::Compile => "compile", + Self::Run => "run", + Self::SessionStart => "session_start", + Self::SessionEnd => "session_end", + Self::CaptureSettingsChanged => "capture_settings_changed", + Self::CompileEnd => "compile_end", + Self::RunEnd => "run_end", + Self::TaskStart => "task_start", + Self::TaskSubmit => "task_submit", + Self::DebugTaskStart => "debug_task_start", + Self::DebugTaskSubmit => "debug_task_submit", + Self::HandoffStart => "handoff_start", + Self::HandoffEnd => "handoff_end", + Self::ReflectionPromptInserted => "reflection_prompt_inserted", + } + } +} - Fields: - `user_id`: The ID of the user associated with the event. - - `event_type`: The type of event (e.g., "keystroke", "file_open"). - `data`: - Optional additional data associated with the event. +impl std::fmt::Display for CaptureEventType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.as_str()) + } +} - ### Example +/// Hash a local file path before it enters capture storage. The hash is stable +/// enough to group edits to the same file while avoiding raw path collection. +pub fn hash_capture_path(path: &str) -> String { + Sha256::digest(path.as_bytes()) + .iter() + .map(|byte| format!("{byte:02x}")) + .collect() +} - let event = Event { user_id: "user123".to_string(), event_type: - "keystroke".to_string(), data: Some("Pressed key A".to_string()), }; -*/ +/// Configuration used to construct the PostgreSQL connection string. +/// +/// You can populate this from a JSON file or environment variables in +/// `main.rs`; this module stays agnostic. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CaptureConfig { + /// PostgreSQL host name or address. + pub host: String, + /// Optional PostgreSQL port. Uses libpq's default when omitted. + #[serde(default)] + pub port: Option, + /// PostgreSQL user name. + pub user: String, + /// PostgreSQL password. Never included in redacted summaries. + pub password: String, + /// PostgreSQL database name. + pub dbname: String, + /// Optional: application-level identifier for this deployment (e.g., course + /// code or semester). Not stored in the DB directly; callers can embed this + /// in `data` if desired. + #[serde(default)] + pub app_id: Option, + /// Local JSONL file used when PostgreSQL is unavailable. + #[serde(default)] + pub fallback_path: Option, +} -#[derive(Deserialize, Debug)] -pub struct Event { - pub user_id: String, - pub event_type: String, - pub data: Option, +impl CaptureConfig { + /// Validate capture configuration before starting the worker. This catches + /// invalid setup early and avoids ambiguous "random port" behavior. + pub fn validate(&self) -> Result<(), String> { + if self.port == Some(0) { + return Err("capture database port must be between 1 and 65535".to_string()); + } + Ok(()) + } + + /// Build a libpq-style connection string. + pub fn to_conn_str(&self) -> String { + let mut parts = vec![ + format!("host={}", self.host), + format!("user={}", self.user), + format!("password={}", self.password), + format!("dbname={}", self.dbname), + ]; + if let Some(port) = self.port { + parts.push(format!("port={port}")); + } + parts.join(" ") + } + + /// Return a human-readable summary that never includes the password. + pub fn redacted_summary(&self) -> String { + format!( + "host={}, port={:?}, user={}, dbname={}, app_id={:?}, fallback_path={:?}", + self.host, self.port, self.user, self.dbname, self.app_id, self.fallback_path + ) + } + + /// Build capture configuration from environment variables. If no capture + /// host is configured, return `Ok(None)` so callers can fall back to a file. + pub fn from_env() -> Result, String> { + let Some(host) = env_var_trimmed("CODECHAT_CAPTURE_HOST") else { + return Ok(None); + }; + + let port = match env_var_trimmed("CODECHAT_CAPTURE_PORT") { + Some(port) => Some(port.parse::().map_err(|err| { + format!("CODECHAT_CAPTURE_PORT must be a valid port number: {err}") + })?), + None => None, + }; + + let cfg = Self { + host, + port, + user: required_env_var("CODECHAT_CAPTURE_USER")?, + password: required_env_var("CODECHAT_CAPTURE_PASSWORD")?, + dbname: required_env_var("CODECHAT_CAPTURE_DBNAME")?, + app_id: env_var_trimmed("CODECHAT_CAPTURE_APP_ID"), + fallback_path: env_var_trimmed("CODECHAT_CAPTURE_FALLBACK_PATH").map(PathBuf::from), + }; + cfg.validate()?; + Ok(Some(cfg)) + } } -/* - ## The Config Structure: +/// Load capture configuration from environment variables or the repo/runtime +/// `capture_config.json`. +/// +/// Environment variables take precedence so deployment can inject secrets +/// without writing them to disk. Local development and student-facing setup use +/// the single config file at `root_path/capture_config.json`. +pub fn load_capture_config(root_path: &Path) -> Option { + match CaptureConfig::from_env() { + Ok(Some(cfg)) => return Some(with_default_capture_fallback_path(cfg, root_path)), + Ok(None) => {} + Err(err) => { + warn!("Capture: invalid environment configuration: {err}"); + return None; + } + } - The `Config` struct represents the database connection parameters read from - `config.json`. + let config_path = root_path.join("capture_config.json"); - Fields: - `db_host`: The hostname or IP address of the database server. - - `db_user`: The username for the database connection. - `db_password`: The - password for the database connection. - `db_name`: The name of the database. + match fs::read_to_string(&config_path) { + Ok(json) => match serde_json::from_str::(&json) { + Ok(cfg) => match cfg.validate() { + Ok(()) => Some(with_default_capture_fallback_path(cfg, root_path)), + Err(err) => { + warn!("Capture: invalid configuration in {config_path:?}: {err}"); + None + } + }, + Err(err) => { + warn!("Capture: invalid JSON in {config_path:?}: {err}"); + None + } + }, + Err(err) => { + info!( + "Capture: disabled (no CODECHAT_CAPTURE_* env and no readable config at {config_path:?}: {err})" + ); + None + } + } +} - let config = Config { db_host: "localhost".to_string(), db_user: - "your_db_user".to_string(), db_password: "your_db_password".to_string(), - db_name: "your_db_name".to_string(), }; -*/ +/// Normalize the fallback JSONL path to the runtime root when a relative path +/// or no path is provided. +pub fn with_default_capture_fallback_path( + mut cfg: CaptureConfig, + root_path: &Path, +) -> CaptureConfig { + match &cfg.fallback_path { + Some(path) if path.is_relative() => { + cfg.fallback_path = Some(root_path.join(path)); + } + Some(_) => {} + None => { + cfg.fallback_path = Some(root_path.join("capture-events-fallback.jsonl")); + } + } + cfg +} -#[derive(Deserialize, Serialize, Debug)] -pub struct Config { - pub db_ip: String, - pub db_user: String, - pub db_password: String, - pub db_name: String, +fn env_var_trimmed(name: &str) -> Option { + env::var(name) + .ok() + .map(|value| value.trim().to_string()) + .filter(|value| !value.is_empty()) } -/* +fn required_env_var(name: &str) -> Result { + env_var_trimmed(name).ok_or_else(|| format!("{name} is required when capture env is used")) +} - ## The EventCapture Structure: +/// Known capture worker states reported to the VS Code status UI. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, TS)] +#[serde(rename_all = "snake_case")] +#[ts(export)] +pub enum CaptureState { + /// Capture is not configured or the worker is unavailable. + Disabled, + /// Capture worker is starting and attempting the first database connection. + Starting, + /// Events are being persisted to PostgreSQL. + Database, + /// Events are being written to local JSONL fallback storage. + Fallback, +} - The `EventCapture` struct provides methods to interact with the database. It -holds a `tokio_postgres::Client` for database operations. +/// Capture worker health exposed to the VS Code status item. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] +#[ts(export)] +pub struct CaptureStatus { + /// True when the capture worker is configured and accepting events. + pub enabled: bool, + /// Current worker state. + pub state: CaptureState, + /// Number of events accepted into the worker queue. + pub queued_events: u64, + /// Number of events inserted into PostgreSQL. + pub persisted_events: u64, + /// Number of events written to the local JSONL fallback. + pub fallback_events: u64, + /// Number of failed enqueue or fallback-write attempts. + pub failed_events: u64, + /// Most recent capture error, if one is known. + pub last_error: Option, + /// Local JSONL fallback path when fallback capture is configured. + pub fallback_path: Option, +} -### Usage Example +impl CaptureStatus { + pub fn disabled() -> Self { + Self { + enabled: false, + state: CaptureState::Disabled, + queued_events: 0, + persisted_events: 0, + fallback_events: 0, + failed_events: 0, + last_error: None, + fallback_path: None, + } + } -#\[tokio::main\] async fn main() -> Result<(), Box> { + fn starting(fallback_path: Option) -> Self { + Self { + enabled: true, + state: CaptureState::Starting, + queued_events: 0, + persisted_events: 0, + fallback_events: 0, + failed_events: 0, + last_error: None, + fallback_path, + } + } +} -``` - // Create an instance of EventCapture using the configuration file - let event_capture = EventCapture::new("config.json").await?; +/// The in-memory representation of a single capture event. +#[derive(Debug, Clone)] +pub struct CaptureEvent { + /// Globally unique event identifier, generated by the client or server. + pub event_id: Option, + /// Client-local event order for one extension session. + pub sequence_number: Option, + /// Capture payload schema version. + pub schema_version: Option, + /// Pseudonymous participant UUID supplied by the extension. + pub user_id: String, + /// Logical capture session UUID. + pub session_id: Option, + /// Origin of the event stream, such as the VS Code extension. + pub event_source: Option, + /// VS Code language identifier for the active file, when known. + pub language_id: Option, + /// Privacy-preserving SHA-256 hash of the local file path. + pub file_hash: Option, + /// Canonical type of the captured event. + pub event_type: CaptureEventType, + /// Server receive/record timestamp, in UTC. + pub timestamp: DateTime, + /// Optional client-observed event timestamp, in milliseconds since Unix + /// epoch. + pub client_timestamp_ms: Option, + /// Client timezone offset in minutes. + pub client_tz_offset_min: Option, + /// Event-specific payload, stored as JSON text in the DB. + pub data: serde_json::Value, +} + +impl CaptureEvent { + /// Convenience constructor when the caller already has a timestamp. + pub fn new( + user_id: String, + file_hash: Option, + event_type: CaptureEventType, + timestamp: DateTime, + data: serde_json::Value, + ) -> Self { + Self { + event_id: None, + sequence_number: None, + schema_version: None, + user_id, + session_id: None, + event_source: None, + language_id: None, + file_hash, + event_type, + timestamp, + client_timestamp_ms: None, + client_tz_offset_min: None, + data, + } + } + + /// Constructor for callers that already have first-class capture columns. + #[allow(clippy::too_many_arguments)] + pub fn with_columns( + event_id: Option, + sequence_number: Option, + schema_version: Option, + user_id: String, + session_id: Option, + event_source: Option, + language_id: Option, + file_hash: Option, + event_type: CaptureEventType, + timestamp: DateTime, + client_timestamp_ms: Option, + client_tz_offset_min: Option, + data: serde_json::Value, + ) -> Self { + Self { + event_id, + sequence_number, + schema_version, + user_id, + session_id, + event_source, + language_id, + file_hash, + event_type, + timestamp, + client_timestamp_ms, + client_tz_offset_min, + data, + } + } - // Create an event - let event = Event { - user_id: "user123".to_string(), - event_type: "keystroke".to_string(), - data: Some("Pressed key A".to_string()), - }; + /// Convenience constructor which uses the current time. + pub fn now( + user_id: String, + file_hash: Option, + event_type: CaptureEventType, + data: serde_json::Value, + ) -> Self { + Self::new(user_id, file_hash, event_type, Utc::now(), data) + } +} - // Insert the event into the database - event_capture.insert_event(event).await?; +/// Generate a server-side event ID for events classified after the original +/// extension message has been processed. +pub fn generate_capture_event_id(prefix: &str) -> String { + let counter = NEXT_CAPTURE_EVENT_ID.fetch_add(1, Ordering::Relaxed); + format!( + "{prefix}-{}-{}-{counter}", + process::id(), + Utc::now().timestamp_micros() + ) +} - Ok(()) -``` -} */ +/// Internal worker message. Identical to `CaptureEvent`, but separated in case +/// we later want to add batching / flush control signals. +type WorkerMsg = CaptureEvent; +/// Handle used by the rest of the server to record events. +/// +/// Cloning this handle is cheap: it only clones an `mpsc::UnboundedSender`. +#[derive(Clone)] pub struct EventCapture { - db_client: Arc>, + tx: mpsc::UnboundedSender, + status: Arc>, } -/* - ## The EventCapture Implementation -*/ - impl EventCapture { - /* - Creates a new `EventCapture` instance by reading the database connection parameters from the `config.json` file and connecting to the PostgreSQL database. - # Arguments - - config_path: The file path to the config.json file. - - # Returns - - A `Result` containing an `EventCapture` instance - */ - - pub async fn new>(config_path: P) -> Result { - // Read the configuration file - let config_content = fs::read_to_string(config_path).map_err(io::Error::other)?; - let config: Config = serde_json::from_str(&config_content) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; - - // Build the connection string for the PostgreSQL database - let conn_str = format!( - "host={} user={} password={} dbname={}", - config.db_ip, config.db_user, config.db_password, config.db_name - ); + /// Create a new `EventCapture` instance and spawn a background worker which + /// consumes events and inserts them into PostgreSQL. + /// + /// This function is synchronous so it can be called from non-async server + /// setup code. It spawns an async task internally which performs the + /// database connection and event processing. + pub fn new(mut config: CaptureConfig) -> Result { + let fallback_path = config + .fallback_path + .get_or_insert_with(|| PathBuf::from("capture-events-fallback.jsonl")) + .clone(); + let conn_str = config.to_conn_str(); + let status = Arc::new(Mutex::new(CaptureStatus::starting(Some( + fallback_path.clone(), + )))); + // High-level DB connection details (no password). info!( - "Attempting Capture Database Connection. IP:[{}] Username:[{}] Database Name:[{}]", - config.db_ip, config.db_user, config.db_name + "Capture: preparing PostgreSQL connection ({})", + config.redacted_summary() + ); + + let (tx, mut rx) = mpsc::unbounded_channel::(); + let status_worker = status.clone(); + + // Create a dedicated runtime so capture can be started from sync code + // before the Actix/Tokio server runtime exists. + thread::Builder::new() + .name("codechat-capture".to_string()) + .spawn(move || { + let runtime = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .enable_all() + .build() + .expect("Capture: failed to build Tokio runtime"); + + runtime.block_on(async move { + info!("Capture: attempting to connect to PostgreSQL."); + + match tokio_postgres::connect(&conn_str, NoTls).await { + Ok((client, connection)) => { + info!("Capture: successfully connected to PostgreSQL."); + update_status(&status_worker, |status| { + status.state = CaptureState::Database; + status.last_error = None; + }); + + // Drive the connection in its own task. + let status_connection = status_worker.clone(); + tokio::spawn(async move { + if let Err(err) = connection.await { + error!("Capture PostgreSQL connection error: {err}"); + update_status(&status_connection, |status| { + status.state = CaptureState::Fallback; + status.last_error = Some(format!( + "PostgreSQL connection error: {err}" + )); + }); + } + }); + + // Main event loop: pull events off the channel and insert + // them into the database. + while let Some(event) = rx.recv().await { + debug!( + "Capture: inserting event: type={}, user_id={}, file_hash={:?}", + event.event_type, event.user_id, event.file_hash + ); + + if let Err(err) = insert_event(&client, &event).await { + error!( + "Capture: FAILED to insert event (type={}, user_id={}): {err}", + event.event_type, event.user_id + ); + update_status(&status_worker, |status| { + status.state = CaptureState::Fallback; + status.last_error = Some(format!( + "PostgreSQL insert failed: {err}" + )); + }); + write_event_to_fallback( + &fallback_path, + &event, + &status_worker, + Some(format!("PostgreSQL insert failed: {err}")), + ); + } else { + update_status(&status_worker, |status| { + status.persisted_events += 1; + if status.state != CaptureState::Database { + status.state = CaptureState::Database; + } + }); + debug!("Capture: event insert successful."); + } + } + + info!("Capture: event channel closed; background worker exiting."); + } + + Err(err) => { + let ctx = format!( + "Capture: FAILED to connect to PostgreSQL (host={}, dbname={}, user={})", + config.host, config.dbname, config.user + ); + + log_pg_connect_error(&ctx, &err); + + update_status(&status_worker, |status| { + status.state = CaptureState::Fallback; + status.last_error = Some(format!( + "PostgreSQL connection failed: {err}" + )); + }); + + warn!( + "Capture: writing pending events to fallback JSONL at {:?}.", + fallback_path + ); + while let Some(event) = rx.recv().await { + write_event_to_fallback( + &fallback_path, + &event, + &status_worker, + Some("PostgreSQL connection unavailable".to_string()), + ); + } + warn!("Capture: event channel closed; fallback worker exiting."); + } + } + }); + }) + .map_err(|err| { + io::Error::other(format!("Capture: failed to start worker thread: {err}")) + })?; + + Ok(Self { tx, status }) + } + + /// Enqueue an event for insertion. This is non-blocking. + pub fn log(&self, event: CaptureEvent) { + debug!( + "Capture: queueing event: type={}, user_id={}, file_hash={:?}", + event.event_type, event.user_id, event.file_hash + ); + + if let Err(err) = self.tx.send(event) { + error!("Capture: FAILED to enqueue capture event: {err}"); + update_status(&self.status, |status| { + status.failed_events += 1; + status.last_error = Some(format!("Failed to enqueue capture event: {err}")); + }); + } else { + update_status(&self.status, |status| { + status.queued_events += 1; + }); + } + } + + pub fn status(&self) -> CaptureStatus { + self.status + .lock() + .map(|status| status.clone()) + .unwrap_or_else(|_| { + let mut status = CaptureStatus::disabled(); + status.last_error = Some("Capture status lock is poisoned".to_string()); + status + }) + } +} + +fn update_status(status: &Arc>, f: impl FnOnce(&mut CaptureStatus)) { + match status.lock() { + Ok(mut guard) => f(&mut guard), + Err(err) => error!("Capture: unable to update status: {err}"), + } +} + +fn write_event_to_fallback( + fallback_path: &Path, + event: &CaptureEvent, + status: &Arc>, + last_error: Option, +) { + match append_fallback_event(fallback_path, event) { + Ok(()) => update_status(status, |status| { + status.fallback_events += 1; + status.last_error = last_error; + }), + Err(err) => { + error!( + "Capture: FAILED to write fallback event to {:?}: {err}", + fallback_path + ); + update_status(status, |status| { + status.failed_events += 1; + status.last_error = Some(format!("Fallback write failed: {err}")); + }); + } + } +} + +fn append_fallback_event(fallback_path: &Path, event: &CaptureEvent) -> io::Result<()> { + if let Some(parent) = fallback_path.parent() + && !parent.as_os_str().is_empty() + { + fs::create_dir_all(parent)?; + } + + let mut file = OpenOptions::new() + .create(true) + .append(true) + .open(fallback_path)?; + let record = serde_json::json!({ + "fallback_timestamp": Utc::now().to_rfc3339(), + "event": { + "event_id": event.event_id, + "sequence_number": event.sequence_number, + "schema_version": event.schema_version, + "user_id": event.user_id, + "session_id": event.session_id, + "event_source": event.event_source, + "language_id": event.language_id, + "file_hash": event.file_hash, + "event_type": event.event_type.as_str(), + "timestamp": event.timestamp.to_rfc3339(), + "client_timestamp_ms": event.client_timestamp_ms, + "client_tz_offset_min": event.client_tz_offset_min, + "data": event.data, + } + }); + writeln!(file, "{record}")?; + Ok(()) +} + +fn log_pg_connect_error(context: &str, err: &tokio_postgres::Error) { + // If Postgres returned a structured DbError, log it ONCE and bail. + if let Some(db) = err.as_db_error() { + // Example: 28P01 = invalid\_password + error!( + "{context}: PostgreSQL {} (SQLSTATE {})", + db.message(), + db.code().code() + ); + + if let Some(detail) = db.detail() { + error!("{context}: detail: {detail}"); + } + if let Some(hint) = db.hint() { + error!("{context}: hint: {hint}"); + } + return; + } + + // Otherwise, try to find an underlying std::io::Error (refused, timed out, + // DNS, etc.) + let mut current: &(dyn Error + 'static) = err; + while let Some(source) = current.source() { + if let Some(ioe) = source.downcast_ref::() { + error!( + "{context}: I/O error kind={:?} raw_os_error={:?} msg={}", + ioe.kind(), + ioe.raw_os_error(), + ioe + ); + return; + } + current = source; + } + + // Fallback: log once (Display) + error!("{context}: {err}"); +} + +fn should_retry_legacy_insert(err: &tokio_postgres::Error) -> bool { + matches!( + err.code().map(|code| code.code()), + Some("42703" | "42P01" | "42804") + ) +} + +/// Insert a single event into the `events` table. +async fn insert_event(client: &Client, event: &CaptureEvent) -> Result { + match insert_rich_event(client, event).await { + Ok(rows) => Ok(rows), + Err(err) if should_retry_legacy_insert(&err) => { + warn!( + "Capture: rich events insert failed against the current schema; retrying legacy insert: {err}" + ); + insert_legacy_event(client, event).await + } + Err(err) => Err(err), + } +} + +async fn insert_rich_event( + client: &Client, + event: &CaptureEvent, +) -> Result { + let timestamp = event.timestamp.to_rfc3339(); + let data_text = event.data.to_string(); + let event_type = event.event_type.as_str(); + + debug!( + "Capture: executing rich INSERT for user_id={}, event_type={}, timestamp={}", + event.user_id, event_type, timestamp + ); + + client + .execute( + "INSERT INTO events \ + (event_id, sequence_number, schema_version, \ + user_id, session_id, \ + event_source, language_id, file_hash, \ + event_type, timestamp, client_timestamp_ms, client_tz_offset_min, data) \ + VALUES \ + ($1, $2, $3, \ + $4, $5, \ + $6, $7, $8, \ + $9, $10::text::timestamptz, $11, $12, $13::text::jsonb)", + &[ + &event.event_id, + &event.sequence_number, + &event.schema_version, + &event.user_id, + &event.session_id, + &event.event_source, + &event.language_id, + &event.file_hash, + &event_type, + ×tamp, + &event.client_timestamp_ms, + &event.client_tz_offset_min, + &data_text, + ], + ) + .await +} + +async fn insert_legacy_event( + client: &Client, + event: &CaptureEvent, +) -> Result { + let timestamp = event.timestamp.to_rfc3339(); + let data_text = event.data.to_string(); + let event_type = event.event_type.as_str(); + + debug!( + "Capture: executing legacy INSERT for user_id={}, event_type={}, timestamp={}", + event.user_id, event_type, timestamp + ); + let file_path: Option = None; + + client + .execute( + "INSERT INTO events \ + (user_id, file_path, event_type, timestamp, data) \ + VALUES ($1, $2, $3, $4::text::timestamptz, $5::text::jsonb)", + &[ + &event.user_id, + &file_path, + &event_type, + ×tamp, + &data_text, + ], + ) + .await +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn capture_config_to_conn_str_is_well_formed() { + let cfg = CaptureConfig { + host: "localhost".to_string(), + port: Some(5432), + user: "alice".to_string(), + password: "secret".to_string(), + dbname: "codechat_capture".to_string(), + app_id: Some("spring25-study".to_string()), + fallback_path: Some(PathBuf::from("capture-events-fallback.jsonl")), + }; + + let conn = cfg.to_conn_str(); + // Very simple checks: we don't care about ordering beyond what we + // format. + assert!(conn.contains("host=localhost")); + assert!(conn.contains("user=alice")); + assert!(conn.contains("password=secret")); + assert!(conn.contains("dbname=codechat_capture")); + assert!(conn.contains("port=5432")); + assert!(!cfg.redacted_summary().contains("secret")); + } + + #[test] + fn capture_event_type_uses_stable_serialized_strings() { + assert_eq!( + serde_json::to_value(CaptureEventType::WriteDoc).unwrap(), + json!("write_doc") + ); + assert_eq!( + serde_json::from_value::(json!("compile_end")).unwrap(), + CaptureEventType::CompileEnd + ); + assert_eq!( + serde_json::to_value(CaptureEventType::CaptureSettingsChanged).unwrap(), + json!("capture_settings_changed") ); + assert!(serde_json::from_value::(json!("random")).is_err()); + } + + #[test] + fn capture_event_new_sets_all_fields() { + let ts = Utc::now(); - // Connect to the database asynchronously - let (client, connection) = tokio_postgres::connect(&conn_str, NoTls) - .await - .map_err(|e| io::Error::new(io::ErrorKind::ConnectionRefused, e))?; + let ev = CaptureEvent::new( + "user123".to_string(), + Some("hashed-path".to_string()), + CaptureEventType::WriteDoc, + ts, + json!({ "chars_typed": 42 }), + ); - // Spawn a task to manage the database connection in the background + assert_eq!(ev.user_id, "user123"); + assert_eq!(ev.file_hash.as_deref(), Some("hashed-path")); + assert_eq!(ev.event_type, CaptureEventType::WriteDoc); + assert_eq!(ev.timestamp, ts); + assert!(ev.event_id.is_none()); + assert_eq!(ev.data, json!({ "chars_typed": 42 })); + } + + #[test] + fn capture_event_now_uses_current_time_and_fields() { + let before = Utc::now(); + let ev = CaptureEvent::now( + "user123".to_string(), + None, + CaptureEventType::Save, + json!({ "reason": "manual" }), + ); + let after = Utc::now(); + + assert_eq!(ev.user_id, "user123"); + assert!(ev.file_hash.is_none()); + assert_eq!(ev.event_type, CaptureEventType::Save); + assert_eq!(ev.data, json!({ "reason": "manual" })); + + // Timestamp sanity check: it should be between before and after + assert!(ev.timestamp >= before); + assert!(ev.timestamp <= after); + } + + #[test] + fn capture_event_with_columns_sets_analysis_columns() { + let ts = Utc::now(); + + let ev = CaptureEvent::with_columns( + Some("abc-123".to_string()), + Some(42), + Some(2), + "user123".to_string(), + Some("session-1".to_string()), + Some("vscode_extension".to_string()), + Some("rust".to_string()), + Some("hash".to_string()), + CaptureEventType::WriteCode, + ts, + Some(ts.timestamp_millis() - 50), + Some(-360), + json!({ "chars_typed": 42 }), + ); + + assert_eq!(ev.event_id.as_deref(), Some("abc-123")); + assert_eq!(ev.sequence_number, Some(42)); + assert_eq!(ev.schema_version, Some(2)); + assert_eq!(ev.session_id.as_deref(), Some("session-1")); + assert_eq!(ev.event_source.as_deref(), Some("vscode_extension")); + assert_eq!(ev.language_id.as_deref(), Some("rust")); + assert_eq!(ev.file_hash.as_deref(), Some("hash")); + assert_eq!(ev.client_timestamp_ms, Some(ts.timestamp_millis() - 50)); + assert_eq!(ev.client_tz_offset_min, Some(-360)); + assert_eq!(ev.data, json!({ "chars_typed": 42 })); + } + + #[test] + fn capture_config_json_round_trip() { + let json_text = r#" + { + "host": "db.example.com", + "user": "bob", + "port": 5433, + "password": "hunter2", + "dbname": "cc_events", + "app_id": "fall25", + "fallback_path": "capture-events-fallback.jsonl" + } + "#; + + let cfg: CaptureConfig = serde_json::from_str(json_text).expect("JSON should parse"); + assert_eq!(cfg.host, "db.example.com"); + assert_eq!(cfg.port, Some(5433)); + assert_eq!(cfg.user, "bob"); + assert_eq!(cfg.password, "hunter2"); + assert_eq!(cfg.dbname, "cc_events"); + assert_eq!(cfg.app_id.as_deref(), Some("fall25")); + assert_eq!( + cfg.fallback_path.as_deref(), + Some(std::path::Path::new("capture-events-fallback.jsonl")) + ); + + // And it should serialize back to JSON without error + let _back = serde_json::to_string(&cfg).expect("Should serialize"); + } + + #[test] + fn capture_config_rejects_port_zero() { + let cfg = CaptureConfig { + host: "localhost".to_string(), + port: Some(0), + user: "alice".to_string(), + password: "secret".to_string(), + dbname: "codechat_capture".to_string(), + app_id: None, + fallback_path: None, + }; + + assert!(cfg.validate().is_err()); + } + + use std::fs; + //use tokio::time::{sleep, Duration}; + + /// Integration-style test: verify that EventCapture inserts into the rich + /// capture schema used by dissertation analysis. + /// + /// Reads connection parameters from the repo-root `capture_config.json`. + /// Logs the config and connection details via log4rs so you can confirm + /// what is used. + /// + /// Run this test with: + /// cargo test event\_capture\_inserts\_rich_schema\_event\_into\_db + /// -- --ignored --nocapture + /// + /// You must have a PostgreSQL database and a `capture_config.json` file + /// such as: { "host": "localhost", "user": "codechat\_test\_user", + /// "password": "codechat\_test\_password", "dbname": + /// "codechat\_capture\_test", "app\_id": "integration-test" } + #[tokio::test] + #[ignore] + async fn event_capture_inserts_rich_schema_event_into_db() + -> Result<(), Box> { + // Initialize logging for this test, using the same log4rs.yml as the + // server. If logging is already initialized, this will just return an + // error which we ignore. + let _ = log4rs::init_file("log4rs.yml", Default::default()); + + // 1. Load the capture configuration from file. + let cfg_text = fs::read_to_string("../capture_config.json") + .expect("capture_config.json must exist in the repo root for this test"); + let cfg: CaptureConfig = + serde_json::from_str(&cfg_text).expect("capture_config.json must be valid JSON"); + + log::info!( + "TEST: Loaded DB config from capture_config.json: host={}, user={}, dbname={}, app_id={:?}", + cfg.host, + cfg.user, + cfg.dbname, + cfg.app_id + ); + + // 2. Connect directly for setup + verification. + let conn_str = cfg.to_conn_str(); + log::info!("TEST: Attempting direct tokio_postgres connection for verification."); + + let (client, connection) = tokio_postgres::connect(&conn_str, NoTls).await?; tokio::spawn(async move { if let Err(e) = connection.await { - error!("Database connection error: [{e}]"); + log::error!("TEST: direct connection error: {e}"); } }); - info!( - "Connected to Database [{}] as User [{}]", - config.db_name, config.db_user + let required_columns = [ + "event_id", + "sequence_number", + "schema_version", + "session_id", + "event_source", + "language_id", + "file_hash", + "client_timestamp_ms", + "client_tz_offset_min", + ]; + for column in required_columns { + let row = client + .query_one( + r#" + SELECT data_type + FROM information_schema.columns + WHERE table_schema = 'public' + AND table_name = 'events' + AND column_name = $1 + "#, + &[&column], + ) + .await + .map_err(|err| { + format!( + "TEST SETUP ERROR: missing public.events.{column}; \ + run server/scripts/capture_events_schema.sql first: {err}" + ) + })?; + let data_type: String = row.get(0); + info!("TEST: public.events.{column} type={data_type}"); + } + + // 4. Start the EventCapture worker using the loaded config. + let capture = EventCapture::new(cfg.clone())?; + log::info!("TEST: EventCapture worker started."); + + // 5. Log a schema-v2 test event with all typed analysis metadata. + let test_suffix = Utc::now().timestamp_millis().to_string(); + let expected_event_id = format!("TEST_EVENT_{test_suffix}"); + let expected_user_id = format!("TEST_USER_{test_suffix}"); + let expected_session_id = format!("TEST_SESSION_{test_suffix}"); + let expected_file_hash = format!("TEST_FILE_HASH_{test_suffix}"); + let event_timestamp = Utc::now(); + let expected_client_timestamp_ms = event_timestamp.timestamp_millis() - 50; + let expected_data = json!({ + "chars_typed": 123, + "classification_basis": "integration_test" + }); + let event = CaptureEvent::with_columns( + Some(expected_event_id.clone()), + Some(42), + Some(2), + expected_user_id.clone(), + Some(expected_session_id.clone()), + Some("integration_test".to_string()), + Some("rust".to_string()), + Some(expected_file_hash.clone()), + CaptureEventType::WriteDoc, + event_timestamp, + Some(expected_client_timestamp_ms), + Some(360), + expected_data.clone(), ); - Ok(EventCapture { - db_client: Arc::new(Mutex::new(client)), - }) - } - - /* - Inserts an event into the database. - - # Arguments - - `event`: An `Event` instance containing the event data to insert. - - # Returns - A `Result` indicating success or containing a `tokio_postgres::Error`. - - # Example - #[tokio::main] - async fn main() -> Result<(), Box> { - let event_capture = EventCapture::new("config.json").await?; - - let event = Event { - user_id: "user123".to_string(), - event_type: "keystroke".to_string(), - data: Some("Pressed key A".to_string()), - }; - - event_capture.insert_event(event).await?; - Ok(()) - } - */ - - pub async fn insert_event(&self, event: Event) -> Result<(), io::Error> { - let current_time = Local::now(); - let formatted_time = current_time.to_rfc3339(); - - // SQL statement to insert the event into the 'events' table - let stmt = indoc! {" - INSERT INTO events (user_id, event_type, timestamp, data) - VALUES ($1, $2, $3, $4) - "}; - - // Acquire a lock on the database client for thread-safe access - let client = self.db_client.lock().await; - - // Execute the SQL statement with the event data - client - .execute( - stmt, - &[ - &event.user_id, - &event.event_type, - &formatted_time, - &event.data, - ], - ) - .await - .map_err(io::Error::other)?; - - info!("Event inserted into database: {event:?}"); + log::info!("TEST: logging a test capture event."); + capture.log(event); - Ok(()) - } -} + // 6. Wait (deterministically) for the background worker to insert the event, + // then fetch THAT row (instead of "latest row in the table"). + use tokio::time::{Duration, Instant, sleep}; -/* Database Schema (SQL DDL) + let deadline = Instant::now() + Duration::from_secs(2); -The following SQL statement creates the `events` table used by this library: + let row = loop { + match client + .query_one( + r#" + SELECT user_id, event_type, + event_id, sequence_number, schema_version, + session_id, event_source, language_id, file_hash, + client_timestamp_ms, client_tz_offset_min, data::text + FROM events + WHERE event_id = $1 + ORDER BY id DESC + LIMIT 1 + "#, + &[&expected_event_id], + ) + .await + { + Ok(row) => break row, // found it + Err(_) => { + if Instant::now() >= deadline { + return Err("Timed out waiting for EventCapture insert".into()); + } + sleep(Duration::from_millis(50)).await; + } + } + }; -CREATE TABLE events ( id SERIAL PRIMARY KEY, user_id TEXT NOT NULL, -event_type TEXT NOT NULL, timestamp TEXT NOT NULL, data TEXT ); + let user_id: String = row.get("user_id"); + let event_type: String = row.get(1); + let event_id: Option = row.get(2); + let sequence_number: Option = row.get(3); + let schema_version: Option = row.get(4); + let session_id: Option = row.get(5); + let event_source: Option = row.get(6); + let language_id: Option = row.get(7); + let file_hash: Option = row.get(8); + let client_timestamp_ms: Option = row.get(9); + let client_tz_offset_min: Option = row.get(10); + let data_text: String = row.get(11); + let data_value: serde_json::Value = serde_json::from_str(&data_text)?; -- **`id SERIAL PRIMARY KEY`**: Auto-incrementing primary key. -- **`user_id TEXT NOT NULL`**: The ID of the user associated with the event. -- **`event_type TEXT NOT NULL`**: The type of event. -- **`timestamp TEXT NOT NULL`**: The timestamp of the event. -- **`data TEXT`**: Optional additional data associated with the event. - **Note:** Ensure this table exists in your PostgreSQL database before using - the library. */ + assert_eq!(user_id, expected_user_id); + assert_eq!(event_type, CaptureEventType::WriteDoc.as_str()); + assert_eq!(event_id.as_deref(), Some(expected_event_id.as_str())); + assert_eq!(sequence_number, Some(42)); + assert_eq!(schema_version, Some(2)); + assert_eq!(session_id.as_deref(), Some(expected_session_id.as_str())); + assert_eq!(event_source.as_deref(), Some("integration_test")); + assert_eq!(language_id.as_deref(), Some("rust")); + assert_eq!(file_hash.as_deref(), Some(expected_file_hash.as_str())); + assert_eq!(client_timestamp_ms, Some(expected_client_timestamp_ms)); + assert_eq!(client_tz_offset_min, Some(360)); + assert_eq!(data_value, expected_data); + + log::info!("✅ TEST: EventCapture integration test succeeded and wrote to database."); + Ok(()) + } +} diff --git a/server/src/ide.rs b/server/src/ide.rs index 67d139de..aab9e932 100644 --- a/server/src/ide.rs +++ b/server/src/ide.rs @@ -93,6 +93,7 @@ async fn start_server( // Provide a class to start and stop the server. All its fields are opaque, // since only Rust should use them. pub struct CodeChatEditorServer { + app_state: WebAppState, server_handle: ServerHandle, from_ide_tx: Sender, to_ide_rx: Arc>>, @@ -141,6 +142,7 @@ impl CodeChatEditorServer { let (expired_messages_tx, expired_messages_rx) = mpsc::channel(100); Ok(CodeChatEditorServer { + app_state, server_handle, from_ide_tx: websocket_queues.from_websocket_tx, to_ide_rx: Arc::new(Mutex::new(websocket_queues.to_websocket_rx)), @@ -251,6 +253,18 @@ impl CodeChatEditorServer { .await } + pub async fn send_capture_event( + &self, + capture_event: webserver::CaptureEventWire, + ) -> std::io::Result { + self.send_message_timeout(EditorMessageContents::Capture(Box::new(capture_event))) + .await + } + + pub fn capture_status(&self) -> crate::capture::CaptureStatus { + webserver::capture_status(&self.app_state) + } + // Send a `CurrentFile` message. The other parameter (true if text/false if // binary/None if ignored) is ignored by the server, so it's always sent as // `None`. diff --git a/server/src/ide/filewatcher.rs b/server/src/ide/filewatcher.rs index 308056cc..3a524c8a 100644 --- a/server/src/ide/filewatcher.rs +++ b/server/src/ide/filewatcher.rs @@ -674,6 +674,7 @@ async fn processing_task( EditorMessageContents::Opened(_) | EditorMessageContents::OpenUrl(_) | + EditorMessageContents::Capture(_) | EditorMessageContents::ClientHtml(_) | EditorMessageContents::RequestClose => { let err = ResultErrTypes::ClientIllegalMessage; diff --git a/server/src/main.rs b/server/src/main.rs index bc443035..f98d4c96 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -332,10 +332,15 @@ fn port_in_range(s: &str) -> Result { fn parse_credentials(s: &str) -> Result { // For simplicity, require a username to have no colons. - let split: Vec<_> = s.splitn(2, ":").collect(); + let Some((username, password)) = s.split_once(':') else { + return Err("auth must use the form username:password".to_string()); + }; + if username.is_empty() { + return Err("auth username may not be empty".to_string()); + } Ok(Credentials { - username: split[0].to_string(), - password: split[1].to_string(), + username: username.to_string(), + password: password.to_string(), }) } diff --git a/server/src/translation.rs b/server/src/translation.rs index e87cf978..1e2288c0 100644 --- a/server/src/translation.rs +++ b/server/src/translation.rs @@ -222,6 +222,7 @@ use tokio::{ // ### Local use crate::{ + capture::{CaptureEventType, hash_capture_path}, lexer::{CodeDocBlock, DocBlock, supported_languages::MARKDOWN_MODE}, processing::{ CodeChatForWeb, CodeMirror, CodeMirrorDiff, CodeMirrorDiffable, CodeMirrorDocBlock, @@ -232,11 +233,11 @@ use crate::{ }, queue_send, queue_send_func, webserver::{ - CursorPosition, EditorMessage, EditorMessageContents, INITIAL_MESSAGE_ID, + CaptureEventWire, CursorPosition, EditorMessage, EditorMessageContents, INITIAL_MESSAGE_ID, MESSAGE_ID_INCREMENT, ProcessingTaskHttpRequest, ProcessingTaskHttpRequestFlags, ResultErrTypes, ResultOkTypes, SimpleHttpResponse, SimpleHttpResponseError, - UpdateMessageContents, WebAppState, WebsocketQueues, file_to_response, path_to_url, - send_response, try_canonicalize, try_read_as_text, url_to_path, + UpdateMessageContents, WebAppState, WebsocketQueues, file_to_response, log_capture_event, + path_to_url, send_response, try_canonicalize, try_read_as_text, url_to_path, }, }; @@ -387,6 +388,7 @@ pub fn create_translation_queues( /// allows factoring out lengthy contents in the loop into subfunctions. struct TranslationTask { // These parameters are passed to us. + app_state: WebAppState, connection_id_raw: String, prefix: &'static [&'static str], allow_source_diffs: bool, @@ -435,6 +437,138 @@ struct TranslationTask { /// Has the full (non-diff) version of the current file been sent? Don't /// send diffs until this is sent. sent_full: bool, + /// Most recent capture metadata supplied by the IDE. Server-generated + /// capture events reuse this so translated write events retain the same + /// participant/session identity as the extension events. + capture_context: CaptureContext, +} + +/// Participant and session metadata remembered from client capture events. +/// +/// The translation layer generates `write_doc`/`write_code` events after it +/// has parsed CodeChat content. Those events should share the same pseudonymous +/// participant and capture session as the extension-side events, but the server +/// should not ask students for course/group/assignment/task setup values. +#[derive(Clone, Debug, Default)] +struct CaptureContext { + /// True only while capture is actively recording. The translation layer + /// must not generate write events from a stale participant/session context + /// after recording or consent is turned off. + active: bool, + /// Pseudonymous participant UUID from the latest client capture event. + user_id: Option, + /// Origin of the client event stream, such as the VS Code extension. + event_source: Option, + /// Extension session UUID carried on the capture wire payload. + session_id: Option, + /// Client timezone offset in minutes, retained for generated write events. + client_tz_offset_min: Option, + /// Capture payload schema version from the extension. + schema_version: Option, +} + +impl CaptureContext { + /// Refresh server-side capture identity and active/inactive state from an + /// extension capture message. This context is used only for server-generated + /// write classification events, not for deciding whether the original + /// extension event itself is inserted. + fn update_from_wire(&mut self, wire: &CaptureEventWire) { + // Session start/end are the coarse lifecycle signals; the explicit + // `capture_active` data field handles settings-change audit events that + // should be inserted while also disabling later translated writes. + match wire.event_type { + CaptureEventType::SessionStart => self.active = true, + CaptureEventType::SessionEnd => self.active = false, + _ => {} + } + // Keep the most recent participant/session metadata so translated write + // events can be joined to the same participant as extension events. + if !wire.user_id.trim().is_empty() { + self.user_id = Some(wire.user_id.clone()); + } + if let Some(event_source) = &wire.event_source { + self.event_source = Some(event_source.clone()); + } + if let Some(session_id) = &wire.session_id { + self.session_id = Some(session_id.clone()); + } + if let Some(schema_version) = wire.schema_version { + self.schema_version = Some(schema_version); + } + if let Some(client_tz_offset_min) = wire.client_tz_offset_min { + self.client_tz_offset_min = Some(client_tz_offset_min); + } + if let Some(serde_json::Value::Object(data)) = &wire.data { + // Settings-change audit events use this flag to tell the server + // whether future translation-generated write events are allowed. + if let Some(active) = data + .get("capture_active") + .and_then(serde_json::Value::as_bool) + { + self.active = active; + } + // Support older wire payloads that stored the session in `data`. + if let Some(session_id) = data.get("session_id").and_then(serde_json::Value::as_str) { + self.session_id = Some(session_id.to_string()); + } + } + } + + fn capture_event( + &self, + event_type: CaptureEventType, + file_path: Option, + data: serde_json::Value, + ) -> Option { + // Do not generate server-side write_doc/write_code rows unless the + // latest settings state says capture is actively recording. + if !self.active { + return None; + } + // Normalize arbitrary JSON payloads into objects so we can attach + // server-translation metadata consistently. + let mut data = match data { + serde_json::Value::Object(map) => map, + other => { + let mut map = serde_json::Map::new(); + map.insert("value".to_string(), other); + map + } + }; + // Preserve any existing source field, but default server-generated + // events to `server_translation` for analysis. + data.entry("source".to_string()) + .or_insert_with(|| serde_json::json!("server_translation")); + + Some(CaptureEventWire { + event_id: None, + sequence_number: None, + schema_version: self.schema_version, + user_id: self.user_id.clone()?, + session_id: self.session_id.clone(), + event_source: self.event_source.clone(), + language_id: None, + file_hash: file_path.as_deref().map(hash_capture_path), + event_type, + client_timestamp_ms: None, + client_tz_offset_min: self.client_tz_offset_min, + data: Some(serde_json::Value::Object(data)), + }) + } +} + +/// True for a capture message that should update `CaptureContext` only. These +/// messages are used to stop server-side write classification after the user +/// turns off consent or recording, without adding a synthetic DB row. +fn capture_control_only(wire: &CaptureEventWire) -> bool { + matches!( + &wire.data, + Some(serde_json::Value::Object(data)) + if data + .get("capture_control_only") + .and_then(serde_json::Value::as_bool) + .unwrap_or(false) + ) } /// This is the processing task for the Visual Studio Code IDE. It handles all @@ -466,6 +600,7 @@ pub async fn translation_task( let mut continue_loop = true; let mut tt = TranslationTask { + app_state: app_state.clone(), connection_id_raw, prefix, allow_source_diffs, @@ -489,6 +624,7 @@ pub async fn translation_task( version: 0.0, // Don't send diffs until this is sent. sent_full: false, + capture_context: CaptureContext::default(), }; while continue_loop { select! { @@ -515,6 +651,19 @@ pub async fn translation_task( EditorMessageContents::Result(_) => continue_loop = tt.ide_result(ide_message).await, EditorMessageContents::Update(_) => continue_loop = tt.ide_update(ide_message).await, + EditorMessageContents::Capture(capture_event) => { + // Capture messages affect both DB storage and the + // translation-layer context used for future + // server-classified write events. + let control_only = capture_control_only(&capture_event); + tt.capture_context.update_from_wire(&capture_event); + if control_only { + debug!("Updated capture context from control-only IDE event."); + } else { + log_capture_event(&app_state, *capture_event); + } + send_response(&tt.to_ide_tx, ide_message.id, Ok(ResultOkTypes::Void)).await; + }, // Update the current file; translate it to a URL then // pass it to the Client. @@ -610,6 +759,18 @@ pub async fn translation_task( }, EditorMessageContents::Update(_) => continue_loop = tt.client_update(client_message).await, + EditorMessageContents::Capture(capture_event) => { + // Same capture handling as IDE messages: update the + // context first, then store only non-control events. + let control_only = capture_control_only(&capture_event); + tt.capture_context.update_from_wire(&capture_event); + if control_only { + debug!("Updated capture context from control-only Client event."); + } else { + log_capture_event(&app_state, *capture_event); + } + send_response(&tt.to_client_tx, client_message.id, Ok(ResultOkTypes::Void)).await; + }, // Update the current file; translate it to a URL then // pass it to the IDE. @@ -700,6 +861,103 @@ pub async fn translation_task( // These provide translation for messages passing through the Server. impl TranslationTask { + fn capture_file_path(file_path: &std::path::Path) -> Option { + file_path.to_str().map(str::to_string) + } + + fn log_server_capture_event( + &self, + event_type: CaptureEventType, + file_path: &std::path::Path, + data: serde_json::Value, + ) { + let Some(capture_event) = self.capture_context.capture_event( + event_type, + Self::capture_file_path(file_path), + data, + ) else { + debug!("Skipping server-classified capture event; capture identity is not known yet."); + return; + }; + log_capture_event(&self.app_state, capture_event); + } + + fn log_raw_write_event(&self, file_path: &std::path::Path, before: &str, after: &str) { + if before == after { + return; + } + self.log_server_capture_event( + CaptureEventType::WriteCode, + file_path, + serde_json::json!({ + "source": "server_translation", + "classification_basis": "raw_text", + "diff": diff_str(before, after), + }), + ); + } + + fn log_code_mirror_write_events( + &self, + file_path: &std::path::Path, + metadata: &SourceFileMetadata, + before_doc: &str, + before_doc_blocks: Option<&CodeMirrorDocBlockVec>, + after: &CodeMirror, + source: &str, + ) { + if metadata.mode == MARKDOWN_MODE { + if !compare_html(before_doc, &after.doc) { + self.log_server_capture_event( + CaptureEventType::WriteDoc, + file_path, + serde_json::json!({ + "source": source, + "classification_basis": "markdown_document", + "mode": metadata.mode, + "diff": diff_str(before_doc, &after.doc), + }), + ); + } + return; + } + + if before_doc != after.doc { + self.log_server_capture_event( + CaptureEventType::WriteCode, + file_path, + serde_json::json!({ + "source": source, + "classification_basis": "codemirror_code_text", + "mode": metadata.mode, + "diff": diff_str(before_doc, &after.doc), + }), + ); + } + + let doc_blocks_changed = match before_doc_blocks { + Some(before) => !doc_blocks_compare(before, &after.doc_blocks), + None => !after.doc_blocks.is_empty(), + }; + if doc_blocks_changed { + let doc_block_diff = before_doc_blocks.map(|before| { + serde_json::json!(diff_code_mirror_doc_blocks(before, &after.doc_blocks)) + }); + self.log_server_capture_event( + CaptureEventType::WriteDoc, + file_path, + serde_json::json!({ + "source": source, + "classification_basis": "codemirror_doc_blocks", + "mode": metadata.mode, + "doc_block_count_before": before_doc_blocks.map_or(0, Vec::len), + "doc_block_count_after": after.doc_blocks.len(), + "doc_block_diff": doc_block_diff, + }), + ); + } + } + // Pass a `Result` message to the Client, unless it's a `LoadFile` result. async fn ide_result(&mut self, ide_message: EditorMessage) -> bool { let EditorMessageContents::Result(ref result) = ide_message.message else { @@ -895,6 +1153,16 @@ impl TranslationTask { else { panic!("Unexpected diff value."); }; + if self.sent_full { + self.log_code_mirror_write_events( + &clean_file_path, + &ccfw.metadata, + &self.code_mirror_doc, + self.code_mirror_doc_blocks.as_ref(), + code_mirror_translated, + "ide", + ); + } // Send a diff if possible. let client_contents = if self.sent_full { self.diff_code_mirror( @@ -940,6 +1208,13 @@ impl TranslationTask { Err(ResultErrTypes::TodoBinarySupport) } TranslationResultsString::Unknown => { + if self.sent_full { + self.log_raw_write_event( + &clean_file_path, + &self.source_code, + &code_mirror.doc, + ); + } // Send the new raw contents. debug!("Sending translated contents to Client."); queue_send_func!(self.to_client_tx.send(EditorMessage { @@ -956,13 +1231,16 @@ impl TranslationTask { mode: "".to_string(), }, source: CodeMirrorDiffable::Plain(CodeMirror { - doc: code_mirror.doc, + doc: code_mirror.doc.clone(), doc_blocks: vec![] }), version: contents.version }), }), })); + self.source_code = code_mirror.doc; + self.code_mirror_doc = self.source_code.clone(); + self.code_mirror_doc_blocks = Some(vec![]); Ok(ResultOkTypes::Void) } TranslationResultsString::Toc(_) => { @@ -1045,12 +1323,22 @@ impl TranslationTask { // what we just received. This must be updated // before we can translate back to check for changes // (the next step). - let CodeMirrorDiffable::Plain(code_mirror) = cfw.source else { + let CodeMirrorDiffable::Plain(ref code_mirror) = cfw.source else { // TODO: support diffable! panic!("Diff not supported."); }; - self.code_mirror_doc = code_mirror.doc; - self.code_mirror_doc_blocks = Some(code_mirror.doc_blocks); + if self.sent_full { + self.log_code_mirror_write_events( + &clean_file_path, + &cfw.metadata, + &self.code_mirror_doc, + self.code_mirror_doc_blocks.as_ref(), + code_mirror, + "client", + ); + } + self.code_mirror_doc = code_mirror.doc.clone(); + self.code_mirror_doc_blocks = Some(code_mirror.doc_blocks.clone()); // We may need to change this version if we send a // diff back to the Client. let mut cfw_version = cfw.version; @@ -1400,7 +1688,88 @@ fn debug_shorten(val: T) -> String { // ----- #[cfg(test)] mod tests { - use crate::{processing::CodeMirrorDocBlock, translation::doc_blocks_compare}; + use crate::{ + capture::CaptureEventType, + processing::CodeMirrorDocBlock, + translation::{CaptureContext, capture_control_only, doc_blocks_compare}, + webserver::CaptureEventWire, + }; + + fn capture_wire( + event_type: crate::capture::CaptureEventType, + data: serde_json::Value, + ) -> CaptureEventWire { + // Minimal test helper for feeding lifecycle/control messages into the + // translation-layer capture context. + CaptureEventWire { + event_id: None, + sequence_number: None, + schema_version: Some(2), + user_id: "participant".to_string(), + session_id: None, + event_source: Some("vscode_extension".to_string()), + language_id: None, + file_hash: None, + event_type, + client_timestamp_ms: None, + client_tz_offset_min: Some(360), + data: Some(data), + } + } + + #[test] + fn capture_context_only_generates_events_while_active() { + let mut context = CaptureContext::default(); + // Without an active capture session, translated writes must be skipped. + assert!( + context + .capture_event(CaptureEventType::WriteCode, None, serde_json::json!({})) + .is_none() + ); + + context.update_from_wire(&capture_wire( + CaptureEventType::SessionStart, + serde_json::json!({ + "session_id": "session", + "capture_active": true, + }), + )); + // A session_start activates server-side translated write capture. + assert!( + context + .capture_event(CaptureEventType::WriteCode, None, serde_json::json!({})) + .is_some() + ); + + context.update_from_wire(&capture_wire( + CaptureEventType::SessionEnd, + serde_json::json!({ + "capture_active": false, + }), + )); + // A session_end deactivates translated write capture so stale context + // cannot continue inserting DB rows. + assert!( + context + .capture_event(CaptureEventType::WriteCode, None, serde_json::json!({})) + .is_none() + ); + } + + #[test] + fn capture_control_only_is_detected_from_data() { + // Control-only events are the extension's way to update server capture + // state without storing the stop signal as a normal event row. + let wire = capture_wire( + CaptureEventType::SessionEnd, + serde_json::json!({ + "capture_active": false, + "capture_control_only": true, + }), + ); + + assert!(capture_control_only(&wire)); + } #[test] fn test_x1() { diff --git a/server/src/webserver.rs b/server/src/webserver.rs index 59df0f54..b678cda4 100644 --- a/server/src/webserver.rs +++ b/server/src/webserver.rs @@ -38,6 +38,7 @@ use std::{ // ### Third-party use actix_files; + use actix_web::{ App, HttpRequest, HttpResponse, HttpServer, dev::{Server, ServerHandle, ServiceFactory, ServiceRequest}, @@ -47,6 +48,7 @@ use actix_web::{ middleware, web::{self, Data}, }; + use actix_web_httpauth::{extractors::basic::BasicAuth, middleware::HttpAuthentication}; use actix_ws::AggregatedMessage; use bytes::Bytes; @@ -95,6 +97,13 @@ use crate::{ }, }; +use crate::capture::{ + CaptureEvent, CaptureEventType, CaptureStatus, EventCapture, generate_capture_event_id, + load_capture_config, +}; + +use chrono::Utc; + // Data structures // --------------- // @@ -201,6 +210,8 @@ pub enum EditorMessageContents { // Server will determine the value if needed. Option, ), + /// Record an instrumentation event. Valid destinations: Server. + Capture(Box), // #### These messages may only be sent by the IDE. /// This is the first message sent when the IDE starts up. It may only be @@ -405,6 +416,8 @@ pub struct AppState { pub connection_id: Mutex>, /// The auth credentials if authentication is used. credentials: Option, + // Added to support capture - JDS - 11/2025 + pub capture: Option, } pub type WebAppState = web::Data; @@ -415,6 +428,56 @@ pub struct Credentials { pub password: String, } +/// JSON payload received from clients for capture events. +/// +/// The server supplies the authoritative timestamp. Study metadata such as +/// course, assignment, group, condition, and task is not part of this wire type: +/// those values are inferred later from researcher-managed mappings keyed by +/// the pseudonymous `user_id` and event timestamps. +#[derive(Debug, Serialize, Deserialize, PartialEq, TS)] +#[ts(export, optional_fields)] +pub struct CaptureEventWire { + /// Client-generated unique event identifier. + #[serde(skip_serializing_if = "Option::is_none")] + pub event_id: Option, + /// Client-local event order for one extension session. + #[serde(skip_serializing_if = "Option::is_none")] + pub sequence_number: Option, + /// Capture payload schema version. + #[serde(skip_serializing_if = "Option::is_none")] + pub schema_version: Option, + /// Pseudonymous participant UUID. This is not the student's real identity. + pub user_id: String, + /// Logical capture session UUID. + #[serde(skip_serializing_if = "Option::is_none")] + pub session_id: Option, + /// Source of this event, such as the VS Code extension or server translation. + #[serde(skip_serializing_if = "Option::is_none")] + pub event_source: Option, + /// VS Code language identifier for the active file, when known. + #[serde(skip_serializing_if = "Option::is_none")] + pub language_id: Option, + /// SHA-256 hash of the local file path. Raw local paths are intentionally + /// not accepted on the capture wire. + #[serde(skip_serializing_if = "Option::is_none")] + pub file_hash: Option, + /// Canonical capture event type. + pub event_type: CaptureEventType, + + /// Optional client-side timestamp (milliseconds since Unix epoch). + #[serde(skip_serializing_if = "Option::is_none")] + pub client_timestamp_ms: Option, + + /// Optional client timezone offset in minutes (JS Date().getTimezoneOffset()). + #[serde(skip_serializing_if = "Option::is_none")] + pub client_tz_offset_min: Option, + + /// Arbitrary event-specific data stored as JSON (optional). + #[serde(skip_serializing_if = "Option::is_none")] + #[ts(type = "unknown")] + pub data: Option, +} + // Macros // ------ /// Create a macro to report an error when enqueueing an item. @@ -449,7 +512,7 @@ macro_rules! queue_send_func { // The timeout for a reply from a websocket, in ms. Use a short timeout to speed // up unit tests. pub const REPLY_TIMEOUT_MS: Duration = if cfg!(test) { - Duration::from_millis(500) + Duration::from_millis(2500) } else { Duration::from_millis(15000) }; @@ -603,6 +666,53 @@ async fn stop(app_state: WebAppState) -> HttpResponse { HttpResponse::NoContent().finish() } +/// Log a capture event if capture is enabled. +pub fn log_capture_event(app_state: &WebAppState, wire: CaptureEventWire) -> CaptureStatus { + if let Some(capture) = &app_state.capture { + let server_timestamp = Utc::now(); + // Default missing data to empty object + let data = wire.data.unwrap_or_else(|| serde_json::json!({})); + + let data = if data.is_object() { + data + } else { + serde_json::json!({ "value": data }) + }; + + let event = CaptureEvent::with_columns( + Some( + wire.event_id + .unwrap_or_else(|| generate_capture_event_id("server")), + ), + wire.sequence_number, + wire.schema_version, + wire.user_id, + wire.session_id, + wire.event_source, + wire.language_id, + wire.file_hash, + wire.event_type, + server_timestamp, + wire.client_timestamp_ms, + wire.client_tz_offset_min, + data, + ); + + capture.log(event); + capture.status() + } else { + CaptureStatus::disabled() + } +} + +pub fn capture_status(app_state: &WebAppState) -> CaptureStatus { + app_state + .capture + .as_ref() + .map(EventCapture::status) + .unwrap_or_else(CaptureStatus::disabled) +} + // Get the `mode` query parameter to determine `is_test_mode`; default to // `false`. pub fn get_test_mode(req: &HttpRequest) -> bool { @@ -1451,9 +1561,6 @@ pub fn setup_server( addr: &SocketAddr, credentials: Option, ) -> std::io::Result<(Server, Data)> { - // Connect to the Capture Database - //let _event_capture = EventCapture::new("config.json").await?; - // Pre-load the bundled files before starting the webserver. let _ = &*BUNDLED_FILES_MAP; let app_data = make_app_data(credentials); @@ -1529,6 +1636,22 @@ pub fn configure_logger(level: LevelFilter) -> Result<(), Box) -> WebAppState { + // Initialize event capture from a config file (optional). + let root_path = ROOT_PATH.lock().unwrap().clone(); + let capture: Option = load_capture_config(&root_path).and_then(|cfg| { + let summary = cfg.redacted_summary(); + match EventCapture::new(cfg) { + Ok(ec) => { + info!("Capture: enabled ({summary})"); + Some(ec) + } + Err(err) => { + warn!("Capture: failed to initialize ({summary}): {err}"); + None + } + } + }); + web::Data::new(AppState { server_handle: Mutex::new(None), filewatcher_next_connection_id: Mutex::new(0), @@ -1539,6 +1662,7 @@ pub fn make_app_data(credentials: Option) -> WebAppState { client_queues: Arc::new(Mutex::new(HashMap::new())), connection_id: Mutex::new(HashSet::new()), credentials, + capture, }) } diff --git a/server/tests/overall_1.rs b/server/tests/overall_1.rs index 61490f7c..a4a05497 100644 --- a/server/tests/overall_1.rs +++ b/server/tests/overall_1.rs @@ -33,7 +33,7 @@ use std::{error::Error, path::PathBuf, time::Duration}; use dunce::canonicalize; use indoc::indoc; use pretty_assertions::assert_eq; -use thirtyfour::{By, Key, WebDriver, error::WebDriverError}; +use thirtyfour::{By, Key, WebDriver, error::WebDriverError, prelude::ElementQueryable}; use tokio::time::sleep; // ### Local @@ -784,39 +784,52 @@ async fn test_client_updates_core( // Target the iframe containing the Client. select_codechat_iframe(&driver).await; - // Select the doc block and add to the line, causing a word wrap. + // Focus the doc block, then wait for the async handoff to the shared inline + // TinyMCE editor before typing. Otherwise WebDriver can type into the + // transient contenteditable div, moving the cursor without marking the doc + // block dirty on macOS Chrome. let contents_css = ".CodeChat-CodeMirror .CodeChat-doc-contents"; let doc_block_contents = driver.find(By::Css(contents_css)).await.unwrap(); + doc_block_contents.click().await.unwrap(); + let doc_block_contents = driver + .query(By::Css( + ".CodeChat-CodeMirror #TinyMCE-inst:not(.CodeChat-doc-hidden)", + )) + .first() + .await + .unwrap(); + + // Add to the line, causing a word wrap. doc_block_contents - .send_keys("" + Key::End + " testing") + .send_keys(Key::End + " testing") .await .unwrap(); - // Get the next message, which could be a cursor update followed by a text + // Get the next message, which could be cursor updates followed by a text // update, or just the text update. let mut client_id = INITIAL_CLIENT_MESSAGE_ID; - let mut msg = codechat_server.get_message_timeout(TIMEOUT).await.unwrap(); - if let EditorMessageContents::Update(ref update) = msg.message + let mut msg = codechat_server + .get_message_timeout(TIMEOUT) + .await + .expect("expected client update after editing doc block"); + while let EditorMessageContents::Update(ref update) = msg.message && update.contents.is_none() { - // Sometimes, we get just a cursor update. If so, verify this then wait - // for the text update. - assert_eq!( - msg, - EditorMessage { - id: client_id, - message: EditorMessageContents::Update(UpdateMessageContents { - file_path: path_str.clone(), - cursor_position: Some(CursorPosition::Line(1)), - scroll_position: Some(1.0), - is_re_translation: false, - contents: None, - }) - } + // Sometimes, we get cursor-only updates. If so, verify and acknowledge + // them, then keep waiting for the text update. + assert_eq!(msg.id, client_id); + assert_eq!(update.file_path, path_str); + assert!(!update.is_re_translation); + assert!( + update.cursor_position.is_some() || update.scroll_position.is_some(), + "cursor-only update must include cursor or scroll state: {msg:#?}", ); + codechat_server.send_result(client_id, None).await.unwrap(); client_id += MESSAGE_ID_INCREMENT; - assert_eq!(client_id, 7.0); - msg = codechat_server.get_message_timeout(TIMEOUT).await.unwrap(); + msg = codechat_server + .get_message_timeout(TIMEOUT) + .await + .expect("expected content update after cursor-only update"); } // Verify the updated text. @@ -850,7 +863,7 @@ async fn test_client_updates_core( ); codechat_server.send_result(client_id, None).await.unwrap(); client_id += MESSAGE_ID_INCREMENT; - assert!(client_id == 10.0 || client_id == 7.0); + assert!(client_id >= 7.0); // The Server sends the Client a wrapped version of the text; the Client // replies with a Result(Ok). @@ -863,7 +876,6 @@ async fn test_client_updates_core( ); server_id += MESSAGE_ID_INCREMENT; - // After this, ID is 13. goto_line(&codechat_server, &driver, &mut client_id, &path_str, 4) .await .unwrap(); diff --git a/server/tests/overall_common/mod.rs b/server/tests/overall_common/mod.rs index 035eeeda..ec5e602a 100644 --- a/server/tests/overall_common/mod.rs +++ b/server/tests/overall_common/mod.rs @@ -145,8 +145,14 @@ impl ExpectedMessages { } } -// Time to wait for `ExpectedMessages`. -pub const TIMEOUT: Duration = Duration::from_millis(3000); +// Time to wait for browser/WebDriver-backed client-server messages. This +// matches the client-side response window and gives CI enough room for autosave +// and loadfile acknowledgements under matrix load. +pub const TIMEOUT: Duration = Duration::from_millis(15000); + +// Browser-backed tests share a single WebDriver endpoint. Safari on macOS CI is +// unreliable with overlapping sessions, so serialize the harness. +pub(crate) static WEB_DRIVER_TEST_LOCK: tokio::sync::Mutex<()> = tokio::sync::Mutex::const_new(()); // ### Test harness // @@ -161,6 +167,7 @@ pub async fn harness< // The output from calling `prep_test_dir!()`. prep_test_dir: (TempDir, PathBuf), ) -> Result<(), Box> { + let _webdriver_test_lock = WEB_DRIVER_TEST_LOCK.lock().await; // Send log events to the tracing subscriber, since the code currently uses // a log-based framework. As below, ignore re-initialization errors. let _ = LogTracer::init(); @@ -237,11 +244,10 @@ pub async fn harness< // Report any errors produced when removing the temporary directory. temp_dir.close()?; - ret.unwrap_or_else(|err| - // Convert a panic to an error. - Err::<(), Box>(Box::from(format!( - "{err:#?}" - )))) + ret.unwrap_or_else( + // Convert a panic to an error. + |err| Err::<(), Box>(Box::from(format!("{err:#?}"))), + ) } #[macro_export]