From d376a25461418ec7c8de4a264b0bc8522ad019e6 Mon Sep 17 00:00:00 2001 From: Dan Lynch Date: Thu, 14 May 2026 01:51:34 +0000 Subject: [PATCH 1/2] refactor: update node type registry for per-field _updated_at timestamps Part of constructive-planning#849 - SearchVector: include_stale_field -> include_updated_at_field, descriptions updated for {field_name}_updated_at timestamptz - DataFileEmbedding: same parameter rename and description updates - DataImageEmbedding: same parameter rename and description updates - All stale_strategy descriptions updated to reflect new field naming --- .../src/data/data-file-embedding.ts | 9 ++++++--- .../src/data/data-image-embedding.ts | 11 ++++++++--- packages/node-type-registry/src/data/search-vector.ts | 6 +++--- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/packages/node-type-registry/src/data/data-file-embedding.ts b/packages/node-type-registry/src/data/data-file-embedding.ts index f0ba5eb7d..a95752014 100644 --- a/packages/node-type-registry/src/data/data-file-embedding.ts +++ b/packages/node-type-registry/src/data/data-file-embedding.ts @@ -185,12 +185,15 @@ export const DataFileEmbedding: NodeTypeDefinition = { enum: ['column', 'null', 'hash'], description: 'Strategy for tracking embedding staleness when extraction is enabled. ' + - 'column: embedding_stale boolean. null: set embedding to NULL. hash: md5 hash.', + 'column: {field_name}_updated_at timestamptz (NULL = needs computation). ' + + 'null: set embedding to NULL. hash: {field_name}_source_hash md5.', default: 'column' }, - include_stale_field: { + include_updated_at_field: { type: 'boolean', - description: 'Whether to include the embedding_stale boolean field (extract mode)', + description: + 'Whether to include the {field_name}_updated_at timestamptz column ' + + '(read-only in GraphQL, set by worker on completion). Extract mode only.', default: true } } diff --git a/packages/node-type-registry/src/data/data-image-embedding.ts b/packages/node-type-registry/src/data/data-image-embedding.ts index e552aed4e..3d6bc9a9f 100644 --- a/packages/node-type-registry/src/data/data-image-embedding.ts +++ b/packages/node-type-registry/src/data/data-image-embedding.ts @@ -156,12 +156,17 @@ export const DataImageEmbedding: NodeTypeDefinition = { stale_strategy: { type: 'string', enum: ['column', 'null', 'hash'], - description: 'Strategy for tracking embedding staleness in extract mode', + description: + 'Strategy for tracking embedding staleness in extract mode. ' + + 'column: {field_name}_updated_at timestamptz. null: set embedding to NULL. ' + + 'hash: {field_name}_source_hash md5.', default: 'column' }, - include_stale_field: { + include_updated_at_field: { type: 'boolean', - description: 'Whether to include the embedding_stale boolean field', + description: + 'Whether to include the {field_name}_updated_at timestamptz column ' + + '(read-only in GraphQL, set by worker on completion).', default: true } } diff --git a/packages/node-type-registry/src/data/search-vector.ts b/packages/node-type-registry/src/data/search-vector.ts index abe1ffc52..84f70c765 100644 --- a/packages/node-type-registry/src/data/search-vector.ts +++ b/packages/node-type-registry/src/data/search-vector.ts @@ -44,9 +44,9 @@ export const SearchVector: NodeTypeDefinition = { description: 'Index-specific options. HNSW: {m, ef_construction}. IVFFlat: {lists}.', default: {} }, - include_stale_field: { + include_updated_at_field: { type: 'boolean', - description: 'When stale_strategy is column, adds an embedding_stale boolean field', + description: 'When stale_strategy is column, adds a per-field {field_name}_updated_at timestamptz column (read-only in GraphQL). NULL means needs computation; set by the worker on completion.', default: true }, source_fields: { @@ -74,7 +74,7 @@ export const SearchVector: NodeTypeDefinition = { 'null', 'hash' ], - description: 'Strategy for tracking embedding staleness. column: embedding_stale boolean. null: set embedding to NULL. hash: md5 hash of source fields.', + description: 'Strategy for tracking embedding staleness. column: {field_name}_updated_at timestamptz (NULL = needs computation). null: set embedding to NULL. hash: {field_name}_source_hash md5 of source fields.', default: 'column' }, chunks: { From 98e5cf439e4d7f610274f0db80789e5c7ac2c18c Mon Sep 17 00:00:00 2001 From: Dan Lynch Date: Thu, 14 May 2026 06:17:15 +0000 Subject: [PATCH 2/2] refactor: remove stale_strategy and include_updated_at_field from node type registry Simplifies SearchVector, DataFileEmbedding, and DataImageEmbedding: - Remove stale_strategy parameter (was column/null/hash) - Remove include_updated_at_field parameter - _updated_at timestamp is now always created by the SQL generator - Update description to reflect simplified behavior --- .../src/data/data-file-embedding.ts | 18 ------------------ .../src/data/data-image-embedding.ts | 18 ------------------ .../src/data/search-vector.ts | 17 +---------------- 3 files changed, 1 insertion(+), 52 deletions(-) diff --git a/packages/node-type-registry/src/data/data-file-embedding.ts b/packages/node-type-registry/src/data/data-file-embedding.ts index a95752014..c6c608e6b 100644 --- a/packages/node-type-registry/src/data/data-file-embedding.ts +++ b/packages/node-type-registry/src/data/data-file-embedding.ts @@ -177,24 +177,6 @@ export const DataFileEmbedding: NodeTypeDefinition = { default: 'generate_chunks' } } - }, - - // ── Stale tracking (meaningful in extract mode) ──────────────── - stale_strategy: { - type: 'string', - enum: ['column', 'null', 'hash'], - description: - 'Strategy for tracking embedding staleness when extraction is enabled. ' + - 'column: {field_name}_updated_at timestamptz (NULL = needs computation). ' + - 'null: set embedding to NULL. hash: {field_name}_source_hash md5.', - default: 'column' - }, - include_updated_at_field: { - type: 'boolean', - description: - 'Whether to include the {field_name}_updated_at timestamptz column ' + - '(read-only in GraphQL, set by worker on completion). Extract mode only.', - default: true } } }, diff --git a/packages/node-type-registry/src/data/data-image-embedding.ts b/packages/node-type-registry/src/data/data-image-embedding.ts index 3d6bc9a9f..e55eb2dbe 100644 --- a/packages/node-type-registry/src/data/data-image-embedding.ts +++ b/packages/node-type-registry/src/data/data-image-embedding.ts @@ -150,24 +150,6 @@ export const DataImageEmbedding: NodeTypeDefinition = { enqueue_chunking_job: { type: 'boolean', default: true }, chunking_task_name: { type: 'string', default: 'generate_chunks' } } - }, - - // ── Stale tracking (forwarded to DataFileEmbedding) ──────────── - stale_strategy: { - type: 'string', - enum: ['column', 'null', 'hash'], - description: - 'Strategy for tracking embedding staleness in extract mode. ' + - 'column: {field_name}_updated_at timestamptz. null: set embedding to NULL. ' + - 'hash: {field_name}_source_hash md5.', - default: 'column' - }, - include_updated_at_field: { - type: 'boolean', - description: - 'Whether to include the {field_name}_updated_at timestamptz column ' + - '(read-only in GraphQL, set by worker on completion).', - default: true } } }, diff --git a/packages/node-type-registry/src/data/search-vector.ts b/packages/node-type-registry/src/data/search-vector.ts index 84f70c765..2de0e9205 100644 --- a/packages/node-type-registry/src/data/search-vector.ts +++ b/packages/node-type-registry/src/data/search-vector.ts @@ -5,7 +5,7 @@ export const SearchVector: NodeTypeDefinition = { slug: 'search_vector', category: 'search', display_name: 'Vector Search', - description: 'Adds a vector embedding column with HNSW or IVFFlat index for similarity search. Supports configurable dimensions, distance metrics (cosine, l2, ip), stale tracking strategies (column, null, hash), and automatic job enqueue triggers for embedding generation.', + description: 'Adds a vector embedding column with HNSW or IVFFlat index for similarity search. Supports configurable dimensions, distance metrics (cosine, l2, ip), per-field {field_name}_updated_at timestamp tracking (read-only in GraphQL), and automatic job enqueue triggers for embedding generation.', parameter_schema: { type: 'object', properties: { @@ -44,11 +44,6 @@ export const SearchVector: NodeTypeDefinition = { description: 'Index-specific options. HNSW: {m, ef_construction}. IVFFlat: {lists}.', default: {} }, - include_updated_at_field: { - type: 'boolean', - description: 'When stale_strategy is column, adds a per-field {field_name}_updated_at timestamptz column (read-only in GraphQL). NULL means needs computation; set by the worker on completion.', - default: true - }, source_fields: { type: 'array', items: { @@ -67,16 +62,6 @@ export const SearchVector: NodeTypeDefinition = { description: 'Task identifier for the job queue', default: 'generate_embedding' }, - stale_strategy: { - type: 'string', - enum: [ - 'column', - 'null', - 'hash' - ], - description: 'Strategy for tracking embedding staleness. column: {field_name}_updated_at timestamptz (NULL = needs computation). null: set embedding to NULL. hash: {field_name}_source_hash md5 of source fields.', - default: 'column' - }, chunks: { type: 'object', description: 'Chunking configuration for long-text embedding. Creates an embedding_chunks record that drives automatic text splitting and per-chunk embedding. Omit to skip chunking.',