From aab3699e650e7976f02776c0cc7ecdd4fba83a14 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Mon, 16 Mar 2026 13:51:20 +0000 Subject: [PATCH 1/5] feat(oq): rename edge fields, fix security scheme type, add duplicates stage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Breaking changes to edge annotation field names (alpha API): - key → edge (edge label: property name, index, pattern) - from → traversal (qualified path from seed) - bfsDepth → hops (BFS distance from seed) - New: schema (clean parent schema name), isRequired (property in required list) Fix security scheme type field returning blank — add `type` as primary field name (alias: schemeType) with extension field fallback. Add `duplicates` analysis stage for finding schemas sharing the same content hash without the group-by/members workaround. Update all documentation: fix stale components.schemas source references, old refs-out/refs-in syntax, and outdated select()/pick/sort_by() examples in DESIGN.md, query --help, query-reference, and READMEs. --- README.md | 4 +- cmd/openapi/commands/openapi/README.md | 4 +- cmd/openapi/commands/openapi/query.go | 13 +- .../commands/openapi/query_reference.go | 45 +++--- oq/DESIGN.md | 34 ++--- oq/README.md | 17 ++- oq/exec.go | 139 ++++++++++++------ oq/field.go | 51 ++++--- oq/format.go | 10 +- oq/oq.go | 13 +- oq/oq_test.go | 117 ++++++++++++--- oq/parse.go | 3 + 12 files changed, 301 insertions(+), 149 deletions(-) diff --git a/README.md b/README.md index 26bc02b..7872536 100644 --- a/README.md +++ b/README.md @@ -183,10 +183,10 @@ openapi swagger validate ./api.swagger.yaml openapi swagger upgrade ./api.swagger.yaml ./openapi.yaml # Query schema graph — find deeply nested components -openapi spec query 'schemas | select(is_component) | sort_by(depth; desc) | first(10) | pick name, depth' ./spec.yaml +openapi spec query 'schemas | where(isComponent) | sort-by(depth, desc) | take(10) | select name, depth' ./spec.yaml # Query schema graph — blast radius of a schema change -openapi spec query 'schemas | select(name == "Error") | blast-radius | length' ./spec.yaml +openapi spec query 'schemas | where(name == "Error") | blast-radius | length' ./spec.yaml ``` For detailed usage instructions for each command group, see the individual documentation linked above. diff --git a/cmd/openapi/commands/openapi/README.md b/cmd/openapi/commands/openapi/README.md index 7a9613d..39147f1 100644 --- a/cmd/openapi/commands/openapi/README.md +++ b/cmd/openapi/commands/openapi/README.md @@ -1218,8 +1218,8 @@ openapi spec query 'operations | where(not hasErrorResponse) | select name, meth # Blast radius — what breaks if I change a schema? openapi spec query 'schemas | where(name == "Error") | blast-radius | length' ./spec.yaml -# Duplicate inline schemas -openapi spec query 'schemas | where(not isComponent) | group-by(hash) | where(count > 1)' ./spec.yaml +# Duplicate inline schemas — find and inspect their shapes +openapi spec query 'schemas | where(isInline) | duplicates | select name, type, hash, propertyCount' ./spec.yaml # Navigate to a single schema by name openapi spec query 'schemas | where(name == "Pet") | explain' ./spec.yaml diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index 89e434e..a3434aa 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -22,18 +22,15 @@ responses, content types, and headers.`, Example: `Queries are pipelines: source | stage | stage | ... Pipeline stages: - Source: schemas, operations, webhooks, servers, tags, - components.schemas, components.parameters, - components.responses, components.request-bodies, components.headers, - components.security-schemes + Source: schemas, operations, components, webhooks, servers, tags, security Navigation: parameters, responses, request-body, content-types, headers, callbacks, links, to-schema, operation, security - Traversal: refs-out, refs-out(*), refs-in, refs-in(*), + Traversal: refs, refs(*), refs(out), refs(out, *), refs(in), refs(in, *), properties, properties(*), members, items, additional-properties, pattern-properties, parent, to-operations, to-schemas, path(A, B), neighbors, neighbors(*), blast-radius - Analysis: orphans, leaves, cycles, clusters, cross-tag, shared-refs + Analysis: orphans, leaves, cycles, clusters, cross-tag, shared-refs, duplicates Filter: where(expr), select , sort-by(field, desc), take(N), last(N), sample(N), highest(N, field), lowest(N, field), unique, group-by(field), group-by(field, name_field), length @@ -50,8 +47,8 @@ Operators: ==, !=, >, <, >=, <=, and, or, not, // (or default), has(), openapi spec query 'operations | security | group-by(schemeType, operation)' spec.yaml openapi spec query 'schemas | where(isComponent) | sort-by(depth, desc) | take(10) | select name, depth' spec.yaml openapi spec query 'schemas | where(properties contains "email") | select name' spec.yaml - openapi spec query 'operations | where(name == "createUser") | request-body | content-types | to-schema | refs-out(*) | where(isComponent) | to-yaml' spec.yaml - openapi spec query 'components.security-schemes | select name, type, scheme' spec.yaml + openapi spec query 'operations | where(name == "createUser") | request-body | content-types | to-schema | refs(out, *) | where(isComponent) | to-yaml' spec.yaml + openapi spec query 'components | where(kind == "security-scheme") | select name, type, scheme' spec.yaml cat spec.yaml | openapi spec query 'schemas | length' For the full query language reference, run: openapi spec query-reference`, diff --git a/cmd/openapi/commands/openapi/query_reference.go b/cmd/openapi/commands/openapi/query_reference.go index e5e486c..4639238 100644 --- a/cmd/openapi/commands/openapi/query_reference.go +++ b/cmd/openapi/commands/openapi/query_reference.go @@ -57,7 +57,7 @@ in the schema reference graph. refs(out, N) Outgoing, depth-limited to N hops properties Expand to property sub-schemas (flattens allOf; with edge annotations) properties(*) Recursive properties (follows $refs, flattens allOf, - expands oneOf/anyOf with qualified from paths) + expands oneOf/anyOf with qualified traversal paths) members Expand allOf/oneOf/anyOf children, or group rows into schemas items Expand to array items schema (checks allOf; with edge annotations) additional-properties Expand to additionalProperties schema @@ -94,6 +94,7 @@ ANALYSIS STAGES cross-tag Component schemas used by operations across multiple tags shared-refs Schemas shared by ALL operations in result set shared-refs(N) Schemas shared by at least N operations + duplicates Schemas sharing the same content hash (at least 2 copies) FILTER & TRANSFORM STAGES -------------------------- @@ -231,10 +232,12 @@ members, items, path). Use 'parent' to navigate back to the source schema. Field Type Description ───── ──── ─────────── via string Structural edge kind: property, items, allOf, oneOf, ref, ... - key string Structural edge label: property name, array index, etc. - from string Source schema name (the schema containing the relationship) + edge string Structural edge label: property name, array index, pattern, etc. + traversal string Qualified traversal path from seed (e.g. "User/allOf/BaseModel") + schema string Clean immediate parent schema name (last segment of traversal path) seed string Seed schema name (the schema that initiated the traversal) - bfsDepth int BFS depth from seed + hops int BFS distance from seed + isRequired bool Whether the property is in the parent schema's required array direction string → (outgoing) or ← (incoming) — set by bidi traversals (refs, path) PARAMETER FIELDS @@ -312,7 +315,7 @@ Available from components | where(kind == "security-scheme"). Field Type Description ───── ──── ─────────── name string Security scheme name (component key) - schemeType string Scheme type: apiKey, http, oauth2, openIdConnect, mutualTLS + type string Scheme type: apiKey, http, oauth2, openIdConnect, mutualTLS (alias: schemeType) in string API key location: header, query, cookie (apiKey only) scheme string HTTP auth scheme: bearer, basic, etc. (http only) bearerFormat string Bearer token format hint, e.g. JWT (http only) @@ -429,13 +432,13 @@ EXAMPLES Schema analysis: # Deeply nested component schemas - components.schemas | sort-by(depth, desc) | take(10) | select name, depth + schemas | where(isComponent) | sort-by(depth, desc) | take(10) | select name, depth # Most referenced schemas - components.schemas | sort-by(inDegree, desc) | take(10) | select name, inDegree + schemas | where(isComponent) | sort-by(inDegree, desc) | take(10) | select name, inDegree # Dead components — defined but never referenced - components.schemas | orphans | select name + schemas | where(isComponent) | orphans | select name # Circular references schemas | where(isCircular) | select name, location @@ -444,10 +447,10 @@ Schema analysis: schemas | where(name == "Error") | blast-radius | length # Component schemas within 3 hops - schemas | where(name == "User") | refs(out, *) | where(isComponent and bfsDepth <= 3) | select name, bfsDepth + schemas | where(name == "User") | refs(out, *) | where(isComponent and hops <= 3) | select name, hops # Edge annotations — how a schema references others - schemas | where(name == "Pet") | refs(out) | select name, via, key, from + schemas | where(name == "Pet") | refs(out) | select name, via, edge, traversal # All transitive dependencies (full closure) schemas | where(name == "Pet") | refs(out, *) | where(isComponent) | select name @@ -456,10 +459,10 @@ Schema analysis: schemas | where(properties contains "email") | select name # Schemas with properties matching a pattern (via traversal) - schemas | properties | where(key matches "(?i)date") | parent | unique | select name + schemas | properties | where(edge matches "(?i)date") | parent | unique | select name # Schemas with names starting with "Error" - components.schemas | where(name startswith "Error") | select name, type + schemas | where(isComponent and name startswith "Error") | select name, type Operations & navigation: @@ -490,7 +493,7 @@ Operations & navigation: Security: # List all security schemes - components.security-schemes | select name, type, scheme + components | where(kind == "security-scheme") | select name, type, scheme # Operations using OAuth2 operations | security | where(schemeType == "oauth2") | select schemeName, scopes, operation @@ -501,16 +504,22 @@ Security: Content auditing: # OneOf unions missing discriminator - components.schemas | where(unionWidth > 0 and not has(discriminator)) | select name, unionWidth + schemas | where(isComponent and unionWidth > 0 and not has(discriminator)) | select name, unionWidth # Schemas missing descriptions - components.schemas | where(not has(description)) | select name, type + schemas | where(isComponent and not has(description)) | select name, type # Operations missing error responses operations | where(not hasErrorResponse) | select name, method, path - # Duplicate inline schemas (same hash) - schemas | where(isInline) | group-by(hash) | where(count > 1) + # Duplicate inline schemas — find and inspect + schemas | where(isInline) | duplicates | select name, type, hash, propertyCount + + # Duplicate inline schemas — group by hash to see counts + schemas | where(isInline) | duplicates | group-by(hash) + + # Duplicate inline schemas — inspect shapes via properties + schemas | where(isInline) | duplicates | properties | select edge, name, type, traversal | unique Webhooks, servers, tags, callbacks & links: @@ -534,7 +543,7 @@ Webhooks, servers, tags, callbacks & links: schemas | where(has(patternProperties)) | pattern-properties # Schemas with default values - schemas | properties | where(has(default)) | select from, key, default + schemas | properties | where(has(default)) | select traversal, edge, default # Operations with extensions operations | where(has(x_speakeasy_name_override)) | select name, x_speakeasy_name_override diff --git a/oq/DESIGN.md b/oq/DESIGN.md index 338c388..7ed4f2e 100644 --- a/oq/DESIGN.md +++ b/oq/DESIGN.md @@ -19,7 +19,7 @@ constructs that users need to query. 1. **Two sources, many stages.** `operations` and `schemas` are the only pipeline entry points. Everything else is reached by navigation stages. 2. **Navigation over filtering.** `schemas.components` and `schemas.inline` are - removed. Use `select(isComponent)` / `select(isInline)` instead. + removed. Use `where(isComponent)` / `where(isInline)` instead. 3. **Context propagation.** Navigation stages propagate parent context as fields on child rows. A content-type row carries `statusCode` from its response and `op_idx` from its operation — no special lineage system needed. @@ -31,12 +31,12 @@ constructs that users need to query. | Change | Before | After | |--------|--------|-------| -| `schemas.components` | Component schemas | Removed — use `schemas \| select(isComponent)` | -| `schemas.inline` | Inline schemas | Removed — use `schemas \| select(isInline)` | +| `schemas.components` | Component schemas | Removed — use `schemas \| where(isComponent)` | +| `schemas.inline` | Inline schemas | Removed — use `schemas \| where(isInline)` | Note: `schemas` source continues to return **all** schemas (components + inline). This preserves the ability to query the full schema set. The sub-sources are -removed because they're just `select()` predicates. +removed because they're just `where()` predicates. ## New Row Types @@ -121,8 +121,8 @@ operations ──┬── parameters ────── schema ├── request-body ── content-types ── schema └── schemas ──────── (existing graph traversal) -schemas ─────┬── refs-out, refs-in, reachable, ancestors - ├── properties, union-members, items +schemas ─────┬── refs(out), refs(in), reachable, ancestors + ├── properties, members, items ├── ops └── (all existing schema traversal stages) ``` @@ -133,7 +133,7 @@ Every non-schema row carries the index of the operation it originated from. - `operation` stage: from any row type with an OpIdx → yields the OperationResult - Works on: parameters, responses, request-body, content-types, headers -- Enables: `operations | responses | content-types | select(...) | operation | unique` +- Enables: `operations | responses | content-types | where(...) | operation | unique` ### Schema Resolution @@ -149,31 +149,31 @@ Every non-schema row carries the index of the operation it originated from. ```bash # Operations serving SSE responses -operations | responses | content-types | select(mediaType == "text/event-stream") | operation | unique +operations | responses | content-types | where(mediaType == "text/event-stream") | operation | unique # All content types used across the API -operations | responses | content-types | pick mediaType | unique | sort_by(mediaType) +operations | responses | content-types | select mediaType | unique | sort-by(mediaType) # Operations with deprecated parameters -operations | parameters | select(deprecated) | operation | unique +operations | parameters | where(deprecated) | operation | unique # Cookie parameters (potential security review) -operations | parameters | select(in == "cookie") | pick name, in, operation +operations | parameters | where(in == "cookie") | select name, in, operation # Responses without content bodies -operations | responses | select(not hasContent) | pick statusCode, description, operation +operations | responses | where(not hasContent) | select statusCode, description, operation # Schema for a specific content type -operations | select(name == "createUser") | request-body | content-types | select(mediaType == "application/json") | schema +operations | where(name == "createUser") | request-body | content-types | where(mediaType == "application/json") | to-schema # Headers on error responses -operations | responses | select(statusCode matches "^[45]") | headers | pick name, required +operations | responses | where(statusCode matches "^[45]") | headers | select name, required # Operations that accept multipart uploads -operations | request-body | content-types | select(mediaType matches "multipart/") | operation | unique +operations | request-body | content-types | where(mediaType matches "multipart/") | operation | unique # Content-types on 200 responses only -operations | responses | content-types | select(statusCode == "200") | pick mediaType, operation +operations | responses | content-types | where(statusCode == "200") | select mediaType, operation ``` ## Emit Attribution Fix @@ -270,7 +270,7 @@ public `SchemaByPtr(*oas3.JSONSchemaReferenceable) (NodeID, bool)` method to - `schemas.components` source - `schemas.inline` source -- Tests for removed sources (update to use `schemas | select(isComponent)` etc.) +- Tests for removed sources (update to use `schemas | where(isComponent)` etc.) ## What Does NOT Change diff --git a/oq/README.md b/oq/README.md index 9d7904c..95a0518 100644 --- a/oq/README.md +++ b/oq/README.md @@ -50,7 +50,7 @@ source | stage | stage | ... | terminal | `refs(out)` / `refs(out, *)` | Outgoing refs only: 1-hop or closure | | `refs(in)` / `refs(in, *)` | Incoming refs only: 1-hop or closure | | `refs(N)` / `refs(out, N)` | Depth-limited to N hops | -| `properties` / `properties(*)` | Property sub-schemas (allOf-flattening). `properties(*)` recursively expands through `$ref`, `oneOf`, `anyOf` with qualified `from` paths | +| `properties` / `properties(*)` | Property sub-schemas (allOf-flattening). `properties(*)` recursively expands through `$ref`, `oneOf`, `anyOf` with qualified `traversal` paths | | `members` | allOf/oneOf/anyOf children, or expand group rows into schemas | | `items` | Array items schema (with edge annotations) | | `additional-properties` | Expand to additionalProperties schema | @@ -88,6 +88,7 @@ Navigate into the internal structure of operations. These stages produce new row | `clusters` | Weakly connected component grouping | | `cross-tag` | Schemas used by operations across multiple tags | | `shared-refs` | Schemas shared by ALL operations in result set | +| `duplicates` | Schemas sharing the same content hash (at least 2 copies) | ### Filter & Transform Stages @@ -183,10 +184,12 @@ Available on rows produced by traversal stages (`refs`, `properties`, `members`, | Field | Type | Description | |-------|------|-------------| | `via` | string | Structural edge kind: property, items, allOf, oneOf, ref, ... | -| `key` | string | Structural edge label: property name, array index, etc. | -| `from` | string | Source schema name (the schema containing the relationship) | +| `edge` | string | Structural edge label: property name, array index, pattern, etc. | +| `traversal` | string | Qualified traversal path from seed (e.g. "User/allOf/BaseModel") | +| `schema` | string | Clean immediate parent schema name (last segment of traversal path) | | `seed` | string | Seed schema name (the schema that initiated the traversal) | -| `bfsDepth` | int | BFS depth from seed | +| `hops` | int | BFS distance from seed | +| `isRequired` | bool | Whether the property is in the parent schema's required array | | `direction` | string | `→` (outgoing) or `←` (incoming) — set by bidi traversals (`refs`, `path`) | ### Parameter Fields @@ -378,13 +381,13 @@ schemas | where(name matches "Error.*") | select name, path schemas | group-by(type) # Edge annotations — how does Pet reference other schemas? -schemas | where(isComponent) | where(name == "Pet") | refs(out) | select name, via, key, from +schemas | where(isComponent) | where(name == "Pet") | refs(out) | select name, via, edge, traversal # Blast radius — what breaks if Error changes? schemas | where(isComponent) | where(name == "Error") | blast-radius | length # 1-hop bidirectional refs (with direction arrows) -schemas | where(isComponent) | where(name == "Pet") | refs | select name, direction, via, key +schemas | where(isComponent) | where(name == "Pet") | refs | select name, direction, via, edge # Orphaned schemas schemas | where(isComponent) | orphans | select name @@ -466,7 +469,7 @@ schemas | where(has(additionalProperties)) | additional-properties schemas | where(has(patternProperties)) | pattern-properties # Schemas with default values -schemas | properties | where(has(default)) | select from, key, default +schemas | properties | where(has(default)) | select traversal, edge, default # Extension fields (use underscores for dashes in expressions) operations | where(has(x_speakeasy_name_override)) | select name, x_speakeasy_name_override diff --git a/oq/exec.go b/oq/exec.go index 28f939e..cd2bb12 100644 --- a/oq/exec.go +++ b/oq/exec.go @@ -219,6 +219,8 @@ func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, erro return execTagBoundary(result, g) case StageSharedRefs: return execSharedRefs(stage, result, g) + case StageDuplicates: + return execDuplicates(result, g) case StageOrigin: return execOrigin(result, g) case StageToYAML: @@ -337,7 +339,7 @@ func execUnique(result *Result, g *graph.SchemaGraph) (*Result, error) { for _, row := range result.Rows { var key string if len(result.Fields) > 0 { - // When pick is active, deduplicate by projected field values + // When select is active, deduplicate by projected field values var sb strings.Builder for i, f := range result.Fields { if i > 0 { @@ -420,8 +422,8 @@ func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Res for _, row := range result.Rows { seedName := schemaName(row.SchemaIdx, g) for _, newRow := range fn(row, g) { - if newRow.Target == "" { - newRow.Target = seedName + if newRow.Seed == "" { + newRow.Seed = seedName } key := edgeRowKey(newRow) if !seen[key] { @@ -435,10 +437,10 @@ func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Res func edgeRowKey(row Row) string { base := rowKey(row) - if row.Via == "" { + if row.EdgeKind == "" { return base } - return base + "|" + row.From + "|" + row.Via + "|" + row.Key + return base + "|" + row.Traversal + "|" + row.EdgeKind + "|" + row.EdgeLabel } // traverseOutEdges returns outgoing edge rows, optionally filtered by edge kind. @@ -456,9 +458,9 @@ func traverseOutEdges(row Row, g *graph.SchemaGraph, kinds ...graph.EdgeKind) [] result = append(result, Row{ Kind: SchemaResult, SchemaIdx: int(edge.To), - Via: edgeKindString(edge.Kind), - Key: edge.Label, - From: fromName, + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + Traversal: fromName, }) } return result @@ -477,7 +479,7 @@ func edgeKindMatch(k graph.EdgeKind, kinds []graph.EdgeKind) bool { // It replaces refs-out, refs-in, and neighbors with a single BFS that: // - Supports direction modes: "out", "in", "" (bidi) // - Supports depth limits: 1-hop, N-hop, or unbounded (*) -// - Always preserves edge metadata (via, key, from, bfsDepth) +// - Always preserves edge metadata (via, traversal, hops) // - In bidi mode, annotates each hop with Direction ("→" or "←") func execRefs(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { dir := stage.RefsDir @@ -501,9 +503,9 @@ func execRefs(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error type entry struct { id graph.NodeID depth int - via string - key string - from string + edgeKind string + edgeLabel string + traversal string direction string // "→" or "←" } @@ -520,11 +522,11 @@ func execRefs(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error r := Row{ Kind: SchemaResult, SchemaIdx: int(cur.id), - BFSDepth: cur.depth, - Target: seedName, - Via: cur.via, - Key: cur.key, - From: cur.from, + Hops: cur.depth, + Seed: seedName, + EdgeKind: cur.edgeKind, + EdgeLabel: cur.edgeLabel, + Traversal: cur.traversal, } if isBidi { r.Direction = cur.direction @@ -544,9 +546,9 @@ func execRefs(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error queue = append(queue, entry{ id: edge.To, depth: cur.depth + 1, - via: edgeKindString(edge.Kind), - key: edge.Label, - from: schemaName(int(cur.id), g), + edgeKind: edgeKindString(edge.Kind), + edgeLabel: edge.Label, + traversal: schemaName(int(cur.id), g), direction: "→", }) } @@ -569,9 +571,9 @@ func execRefs(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error queue = append(queue, entry{ id: resolvedID, depth: cur.depth + 1, - via: via, - key: key, - from: schemaName(resolvedIdx, g), + edgeKind: via, + edgeLabel: key, + traversal: schemaName(resolvedIdx, g), direction: "←", }) } @@ -644,9 +646,9 @@ func collectPropertiesDirect(idx int, fromName string, g *graph.SchemaGraph, see *result = append(*result, Row{ Kind: SchemaResult, SchemaIdx: int(edge.To), - Via: edgeKindString(edge.Kind), - Key: edge.Label, - From: fromName, + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + Traversal: fromName, }) } } @@ -712,7 +714,7 @@ func execPropertiesFixpoint(result *Result, g *graph.SchemaGraph) (*Result, erro for _, prop := range props { if !seen[prop.SchemaIdx] { seen[prop.SchemaIdx] = true - prop.BFSDepth = depth + prop.Hops = depth nextLevel = append(nextLevel, prop) } } @@ -739,9 +741,9 @@ func traverseUnionMembers(row Row, g *graph.SchemaGraph) []Row { result = append(result, Row{ Kind: SchemaResult, SchemaIdx: target, - Via: edgeKindString(edge.Kind), - Key: edge.Label, - From: fromName, + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + Traversal: fromName, }) } } @@ -774,9 +776,9 @@ func traverseItems(row Row, g *graph.SchemaGraph) []Row { result = append(result, Row{ Kind: SchemaResult, SchemaIdx: int(innerEdge.To), - Via: edgeKindString(innerEdge.Kind), - Key: innerEdge.Label, - From: fromName, + EdgeKind: edgeKindString(innerEdge.Kind), + EdgeLabel: innerEdge.Label, + Traversal: fromName, }) } // Property named "items" that is an array — follow its EdgeItems @@ -787,9 +789,9 @@ func traverseItems(row Row, g *graph.SchemaGraph) []Row { result = append(result, Row{ Kind: SchemaResult, SchemaIdx: resolveRefTarget(int(itemEdge.To), g), - Via: edgeKindString(itemEdge.Kind), - Key: "items", - From: fromName, + EdgeKind: edgeKindString(itemEdge.Kind), + EdgeLabel: "items", + Traversal: fromName, }) } } @@ -1189,6 +1191,42 @@ func execSharedRefs(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, return out, nil } +// execDuplicates filters schema rows to only those sharing a content hash with +// at least one other schema in the input set. This makes it easy to find and +// inspect duplicate inline schemas without the group-by/members dance. +func execDuplicates(result *Result, g *graph.SchemaGraph) (*Result, error) { + // Count occurrences of each hash + hashCounts := make(map[string]int) + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + if row.SchemaIdx < 0 || row.SchemaIdx >= len(g.Schemas) { + continue + } + h := g.Schemas[row.SchemaIdx].Hash + if h != "" { + hashCounts[h]++ + } + } + + // Keep only rows whose hash appears more than once + out := deriveResult(result) + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + if row.SchemaIdx < 0 || row.SchemaIdx >= len(g.Schemas) { + continue + } + h := g.Schemas[row.SchemaIdx].Hash + if h != "" && hashCounts[h] > 1 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + // --- Edge annotation helpers --- func schemaName(idx int, g *graph.SchemaGraph) string { @@ -1338,6 +1376,8 @@ func describeStage(stage Stage) string { return "Analyze: schemas shared by at least " + strconv.Itoa(stage.Limit) + " operations" } return "Analyze: schemas shared by all operations in result" + case StageDuplicates: + return "Filter: schemas sharing the same content hash (duplicates)" case StageOrigin: return "Traverse: structural parent schema (walk up JSON pointer)" case StageToYAML: @@ -1425,10 +1465,11 @@ func execFields(result *Result) (*Result, error) { {"opCount", "int"}, {"tagCount", "int"}, {"via", "string"}, - {"key", "string"}, - {"from", "string"}, + {"edge", "string"}, + {"traversal", "string"}, + {"schema", "string"}, {"seed", "string"}, - {"bfsDepth", "int"}, + {"hops", "int"}, {"direction", "string"}, {"isRequired", "bool"}, // Schema content @@ -1481,8 +1522,8 @@ func execFields(result *Result) (*Result, error) { {"callbackName", "string"}, {"callbackCount", "int"}, {"via", "string"}, - {"key", "string"}, - {"from", "string"}, + {"edge", "string"}, + {"traversal", "string"}, } case ParameterResult: fields = []struct{ name, typ string }{ @@ -1538,7 +1579,7 @@ func execFields(result *Result) (*Result, error) { case SecuritySchemeResult: fields = []struct{ name, typ string }{ {"name", "string"}, - {"schemeType", "string"}, + {"type", "string"}, {"in", "string"}, {"scheme", "string"}, {"bearerFormat", "string"}, @@ -1593,9 +1634,9 @@ func execFields(result *Result) (*Result, error) { // --- Origin --- // execOrigin navigates back to the source schema of 1-hop edge annotations. -// After properties, union-members, items, refs-out, or refs-in, each row has -// a From field naming the source node. This stage looks up those source schemas -// by name, replacing the result set with the origin schemas. +// After properties, members, items, or refs traversals, each row has +// a Traversal field naming the source node. This stage looks up those source +// schemas by name, replacing the result set with the origin schemas. func execOrigin(result *Result, g *graph.SchemaGraph) (*Result, error) { out := deriveResult(result) seen := make(map[int]bool) @@ -1656,11 +1697,11 @@ func execPath(stage Stage, g *graph.SchemaGraph) (*Result, error) { for i, hop := range hops { row := Row{Kind: SchemaResult, SchemaIdx: int(hop.Node)} if i > 0 { - row.From = schemaName(int(hops[i-1].Node), g) - row.BFSDepth = i + row.Traversal = schemaName(int(hops[i-1].Node), g) + row.Hops = i if hop.Edge != nil { - row.Via = edgeKindString(hop.Edge.Kind) - row.Key = hop.Edge.Label + row.EdgeKind = edgeKindString(hop.Edge.Kind) + row.EdgeLabel = hop.Edge.Label } if hop.Forward { row.Direction = "→" diff --git a/oq/field.go b/oq/field.go index 2a5b0e6..cd8d8ec 100644 --- a/oq/field.go +++ b/oq/field.go @@ -115,22 +115,25 @@ func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { case "tagCount": return expr.IntVal(schemaTagCount(row.SchemaIdx, g)) case "via": - return expr.StringVal(row.Via) - case "key": - return expr.StringVal(row.Key) - case "from": - return expr.StringVal(row.From) + return expr.StringVal(row.EdgeKind) + case "edge": + return expr.StringVal(row.EdgeLabel) + case "traversal": + return expr.StringVal(row.Traversal) + case "schema": + // Clean immediate parent schema name — last segment of traversal path + return expr.StringVal(traversalSchema(row.Traversal)) case "seed": - return expr.StringVal(row.Target) - case "bfsDepth": - return expr.IntVal(row.BFSDepth) + return expr.StringVal(row.Seed) + case "hops": + return expr.IntVal(row.Hops) case "direction": return expr.StringVal(row.Direction) case "isRequired": - // Check if this property's key appears in the parent schema's required array. + // Check if this property's edge label appears in the parent schema's required array. // Only meaningful on property edge rows (from traversal stages). - if row.Via == "property" && row.Key != "" { - return expr.BoolVal(isPropertyRequired(row.Key, row.From, g)) + if row.EdgeKind == "property" && row.EdgeLabel != "" { + return expr.BoolVal(isPropertyRequired(row.EdgeLabel, row.Traversal, g)) } return expr.BoolVal(false) case "properties": @@ -192,11 +195,11 @@ func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { } return expr.IntVal(0) case "via": - return expr.StringVal(row.Via) - case "key": - return expr.StringVal(row.Key) - case "from": - return expr.StringVal(row.From) + return expr.StringVal(row.EdgeKind) + case "edge": + return expr.StringVal(row.EdgeLabel) + case "traversal": + return expr.StringVal(row.Traversal) default: return operationContentField(o, name) } @@ -327,7 +330,7 @@ func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { switch name { case "name": return expr.StringVal(row.SchemeName) - case "schemeType": + case "type", "schemeType": return expr.StringVal(string(ss.GetType())) case "in": return expr.StringVal(string(ss.GetIn())) @@ -342,6 +345,7 @@ func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { case "deprecated": return expr.BoolVal(ss.Deprecated != nil && *ss.Deprecated) } + return extensionFieldValue(ss.Extensions, name) case SecurityRequirementResult: switch name { case "schemeName": @@ -793,6 +797,19 @@ var schemaPresenceProbes = map[string]func(*oas3.Schema) bool{ "unevaluatedProperties": func(s *oas3.Schema) bool { return s.UnevaluatedProperties != nil }, } +// traversalSchema extracts the clean immediate parent schema name from a traversal path. +// For simple paths like "User", returns "User". +// For qualified paths like "User/allOf/BaseModel", returns "BaseModel" (last segment). +func traversalSchema(traversal string) string { + if traversal == "" { + return "" + } + if idx := strings.LastIndex(traversal, "/"); idx >= 0 { + return traversal[idx+1:] + } + return traversal +} + // isPropertyRequired checks if a property key is in the parent schema's required array. func isPropertyRequired(key, fromName string, g *graph.SchemaGraph) bool { // For qualified from paths like "Event/oneOf/EntityCreatedEvent", diff --git a/oq/format.go b/oq/format.go index 8e96f62..a3d24ac 100644 --- a/oq/format.go +++ b/oq/format.go @@ -458,16 +458,16 @@ func resolveDefaultFields(rows []Row) []string { // traversal-oriented defaults instead of overview defaults. if firstKind == SchemaResult && hasEdgeAnnotations(rows) { if hasBidiAnnotations(rows) { - return []string{"name", "direction", "via", "key", "bfsDepth"} + return []string{"name", "direction", "via", "edge", "hops"} } - return []string{"from", "key", "type", "bfsDepth"} + return []string{"schema", "edge", "type", "hops"} } return defaultFieldsForKind(firstKind) } // expandStarFields replaces "*" in a field list with the context-aware defaults. -// e.g., ["*", "bfsDepth"] → ["name", "type", "depth", "inDegree", "outDegree", "bfsDepth"] +// e.g., ["*", "hops"] → ["name", "type", "depth", "inDegree", "outDegree", "hops"] func expandStarFields(fields []string, rows []Row) []string { hasStar := false for _, f := range fields { @@ -495,7 +495,7 @@ func expandStarFields(fields []string, rows []Row) []string { // hasEdgeAnnotations returns true if any row has populated edge fields. func hasEdgeAnnotations(rows []Row) bool { for _, row := range rows { - if row.Via != "" || row.From != "" { + if row.EdgeKind != "" || row.Traversal != "" { return true } } @@ -531,7 +531,7 @@ func defaultFieldsForKind(kind ResultKind) []string { case HeaderResult: return []string{"name", "required", "statusCode", "operation"} case SecuritySchemeResult: - return []string{"name", "schemeType", "in", "scheme"} + return []string{"name", "type", "in", "scheme"} case SecurityRequirementResult: return []string{"schemeName", "schemeType", "scopes", "operation"} case ServerResult: diff --git a/oq/oq.go b/oq/oq.go index b2db21a..307f5cc 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -38,14 +38,14 @@ type Row struct { OpIdx int // index into SchemaGraph.Operations // Edge annotations (populated by traversal stages) - Via string // edge type: "property", "items", "allOf", "oneOf", "ref", etc. - Key string // edge key: property name, array index, etc. - From string // source node name (the node that contains the reference) - Target string // target/seed node name (the node traversal originated from) + EdgeKind string // edge type: "property", "items", "allOf", "oneOf", "ref", etc. + EdgeLabel string // edge label: property name, array index, etc. + Traversal string // qualified traversal path from seed (e.g. "User/allOf/BaseModel") + Seed string // seed schema name (the schema that initiated the traversal) Direction string // "→" (outgoing) or "←" (incoming) — set by bidi traversals - // BFS depth (populated by depth-limited traversals) - BFSDepth int + // Traversal depth (populated by depth-limited traversals) + Hops int // Group annotations (populated by group-by stages) GroupKey string // group key value @@ -185,6 +185,7 @@ const ( StageLinks // response links StageAdditionalProperties // schema additional properties traversal StagePatternProperties // schema pattern properties traversal + StageDuplicates // schemas sharing the same content hash ) // Stage represents a single stage in the query pipeline. diff --git a/oq/oq_test.go b/oq/oq_test.go index f27b98c..85d0543 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -402,20 +402,20 @@ func TestExecute_Path_EdgeAnnotations_Success(t *testing.T) { g := loadTestGraph(t) // Path from Pet to Address should have edge annotations on intermediate/final nodes - result, err := oq.Execute(`schemas | path(Pet, Address) | select name, via, key, from, bfsDepth`, g) + result, err := oq.Execute(`schemas | path(Pet, Address) | select name, via, edge, traversal, hops`, g) require.NoError(t, err) require.GreaterOrEqual(t, len(result.Rows), 3, "path should have at least 3 nodes") // First node (Pet) has no edge annotation first := result.Rows[0] assert.Empty(t, oq.FieldValuePublic(first, "via", g).Str, "first node should have no via") - assert.Equal(t, 0, oq.FieldValuePublic(first, "bfsDepth", g).Int, "first node should have bfsDepth 0") + assert.Equal(t, 0, oq.FieldValuePublic(first, "hops", g).Int, "first node should have hops 0") // Second node should have edge annotations from Pet second := result.Rows[1] assert.NotEmpty(t, oq.FieldValuePublic(second, "via", g).Str, "second node should have via") - assert.Equal(t, "Pet", oq.FieldValuePublic(second, "from", g).Str, "second node should come from Pet") - assert.Equal(t, 1, oq.FieldValuePublic(second, "bfsDepth", g).Int, "second node should have bfsDepth 1") + assert.Equal(t, "Pet", oq.FieldValuePublic(second, "traversal", g).Str, "second node should come from Pet") + assert.Equal(t, 1, oq.FieldValuePublic(second, "hops", g).Int, "second node should have hops 1") // Last node (Address) should have edge annotations last := result.Rows[len(result.Rows)-1] @@ -557,7 +557,7 @@ func TestExecute_EdgeAnnotations_Success(t *testing.T) { t.Parallel() g := loadTestGraph(t) - result, err := oq.Execute(`schemas | where(isComponent) | where(name == "Pet") | refs(out) | select name, via, key, from`, g) + result, err := oq.Execute(`schemas | where(isComponent) | where(name == "Pet") | refs(out) | select name, via, edge, traversal`, g) require.NoError(t, err) assert.NotEmpty(t, result.Rows, "references from Pet should have results") @@ -565,8 +565,8 @@ func TestExecute_EdgeAnnotations_Success(t *testing.T) { for _, row := range result.Rows { kind := oq.FieldValuePublic(row, "via", g) assert.NotEmpty(t, kind.Str, "via should be set") - from := oq.FieldValuePublic(row, "from", g) - assert.Equal(t, "Pet", from.Str, "from should be Pet") + from := oq.FieldValuePublic(row, "traversal", g) + assert.Equal(t, "Pet", from.Str, "traversal should be Pet") } } @@ -1040,7 +1040,7 @@ func TestExecute_FieldValue_EdgeCases(t *testing.T) { assert.NotEmpty(t, result.Rows, "should have operation rows") // Test edge fields on non-traversal rows (should be empty strings) - result, err = oq.Execute("schemas | where(isComponent) | take(1) | select name, via, key, from", g) + result, err = oq.Execute("schemas | where(isComponent) | take(1) | select name, via, edge, traversal", g) require.NoError(t, err) assert.NotEmpty(t, result.Rows, "should have schema rows") viaVal := oq.FieldValuePublic(result.Rows[0], "via", g) @@ -1327,7 +1327,7 @@ func TestExecute_CyclicSpec_EdgeAnnotations(t *testing.T) { g := loadCyclicGraph(t) // Test references to cover edgeKindString branches - result, err := oq.Execute(`schemas | where(isComponent) | where(name == "NodeA") | refs(out) | select name, via, key`, g) + result, err := oq.Execute(`schemas | where(isComponent) | where(name == "NodeA") | refs(out) | select name, via, edge`, g) require.NoError(t, err) assert.NotEmpty(t, result.Rows, "NodeA should have outgoing refs") @@ -2437,15 +2437,15 @@ func TestExecute_ReferencedByResolvesWrappers_Success(t *testing.T) { // Via should be the structural edge kind (property, items, etc.), not 'ref' // Key should be the structural label (e.g. 'owner'), not the $ref URI for _, row := range result.Rows { - from := oq.FieldValuePublic(row, "from", g) + from := oq.FieldValuePublic(row, "traversal", g) name := oq.FieldValuePublic(row, "name", g) - assert.Equal(t, name.Str, from.Str, "from should match the resolved node name") + assert.Equal(t, name.Str, from.Str, "traversal should match the resolved node name") via := oq.FieldValuePublic(row, "via", g) assert.NotEqual(t, "ref", via.Str, "via should be structural (property/items/allOf), not 'ref'") - key := oq.FieldValuePublic(row, "key", g) - assert.NotContains(t, key.Str, "#/components", "key should be structural label, not $ref URI") + key := oq.FieldValuePublic(row, "edge", g) + assert.NotContains(t, key.Str, "#/components", "edge should be structural label, not $ref URI") } } @@ -2794,7 +2794,7 @@ func TestExecute_FieldsIntrospection_Success(t *testing.T) { expect string }{ {"operations | fields", "method"}, - {"schemas | where(isComponent) | fields", "bfsDepth"}, + {"schemas | where(isComponent) | fields", "hops"}, {"schemas | where(isComponent) | fields", "seed"}, {"operations | where(operationId == \"listPets\") | parameters | fields", "in"}, {"operations | where(operationId == \"listPets\") | responses | fields", "statusCode"}, @@ -3436,7 +3436,7 @@ func TestEdgeKindString_CyclicSpec(t *testing.T) { g := loadCyclicGraph(t) // Get refs(out) from ALL schemas (including inline) to capture allOf, anyOf, items edges - result, err := oq.Execute(`schemas | refs(out) | select name, via, key`, g) + result, err := oq.Execute(`schemas | refs(out) | select name, via, edge`, g) require.NoError(t, err) assert.NotEmpty(t, result.Rows) @@ -3997,7 +3997,7 @@ func TestPropertiesStar_Recursive(t *testing.T) { g := loadCyclicGraph(t) // properties(*) recursively collects properties through allOf, oneOf, anyOf - result, err := oq.Execute(`schemas | where(isComponent) | where(name == "NodeA") | properties(*) | select name, via, from`, g) + result, err := oq.Execute(`schemas | where(isComponent) | where(name == "NodeA") | properties(*) | select name, via, traversal`, g) require.NoError(t, err) assert.NotEmpty(t, result.Rows) } @@ -4412,7 +4412,7 @@ func TestExecute_SchemaDefaultField(t *testing.T) { g := loadTestGraph(t) // Pet.status has default: available - result, err := oq.Execute(`schemas | where(name == "Pet") | properties | where(key == "status") | select key, default`, g) + result, err := oq.Execute(`schemas | where(name == "Pet") | properties | where(edge == "status") | select edge, default`, g) require.NoError(t, err) require.NotEmpty(t, result.Rows) def := oq.FieldValuePublic(result.Rows[0], "default", g) @@ -4424,7 +4424,7 @@ func TestExecute_SchemaEnumField(t *testing.T) { g := loadTestGraph(t) // Pet.status has enum values - result, err := oq.Execute(`schemas | where(name == "Pet") | properties | where(key == "status") | select key, enum, enumCount`, g) + result, err := oq.Execute(`schemas | where(name == "Pet") | properties | where(edge == "status") | select edge, enum, enumCount`, g) require.NoError(t, err) require.NotEmpty(t, result.Rows) enumCount := oq.FieldValuePublic(result.Rows[0], "enumCount", g) @@ -4642,3 +4642,84 @@ func TestExecute_ComponentDefaultField(t *testing.T) { assert.Equal(t, "20", def.Str) } } + +func TestExecute_SecuritySchemeTypeAlias(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`components | where(kind == "security-scheme")`, g) + require.NoError(t, err) + require.NotEmpty(t, result.Rows) + + row := result.Rows[0] + + // "type" should work as alias for "schemeType" + typ := oq.FieldValuePublic(row, "type", g) + assert.Equal(t, "http", typ.Str) + + // "schemeType" should still work + schemeType := oq.FieldValuePublic(row, "schemeType", g) + assert.Equal(t, "http", schemeType.Str) + + // Both should return the same value + assert.Equal(t, typ.Str, schemeType.Str) +} + +func TestExecute_Duplicates_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Get all inline schemas + allInline, err := oq.Execute(`schemas | where(isInline)`, g) + require.NoError(t, err) + + // Get duplicates + result, err := oq.Execute(`schemas | where(isInline) | duplicates`, g) + require.NoError(t, err) + + // Duplicates should be a subset of inline schemas + assert.LessOrEqual(t, len(result.Rows), len(allInline.Rows)) + + // Every row in duplicates should share its hash with at least one other row + hashCounts := make(map[string]int) + for _, row := range result.Rows { + h := oq.FieldValuePublic(row, "hash", g) + if h.Str != "" { + hashCounts[h.Str]++ + } + } + for hash, count := range hashCounts { + assert.Greater(t, count, 1, "hash %s should appear more than once in duplicates result", hash) + } +} + +func TestExecute_Duplicates_AllSchemas(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // duplicates works on any schema set, not just inline + result, err := oq.Execute(`schemas | duplicates`, g) + require.NoError(t, err) + + // Verify all returned rows are schema rows + for _, row := range result.Rows { + assert.Equal(t, oq.SchemaResult, row.Kind) + } +} + +func TestExecute_Duplicates_Composable(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // duplicates should be composable with downstream stages + _, err := oq.Execute(`schemas | where(isInline) | duplicates | select name, type, hash`, g) + require.NoError(t, err) + + // Should work with properties traversal + _, err = oq.Execute(`schemas | where(isInline) | duplicates | properties | select edge, name`, g) + require.NoError(t, err) + + // Should work with group-by on the result + _, err = oq.Execute(`schemas | where(isInline) | duplicates | group-by(hash)`, g) + require.NoError(t, err) +} diff --git a/oq/parse.go b/oq/parse.go index 1d9d225..66a29bb 100644 --- a/oq/parse.go +++ b/oq/parse.go @@ -401,6 +401,9 @@ func parseStage(s string) (Stage, error) { case "pattern-properties": return Stage{Kind: StagePatternProperties}, nil + case "duplicates": + return Stage{Kind: StageDuplicates}, nil + default: return Stage{}, fmt.Errorf("unknown stage: %q", keyword) } From 7f3ea4ecb1844fa887173aed1a17cc02eb945f9e Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Mon, 16 Mar 2026 13:52:08 +0000 Subject: [PATCH 2/5] chore(oq): remove outdated DESIGN.md --- oq/DESIGN.md | 292 --------------------------------------------------- 1 file changed, 292 deletions(-) delete mode 100644 oq/DESIGN.md diff --git a/oq/DESIGN.md b/oq/DESIGN.md deleted file mode 100644 index 7ed4f2e..0000000 --- a/oq/DESIGN.md +++ /dev/null @@ -1,292 +0,0 @@ -# oq Navigation Model Overhaul - -## Motivation - -The current query language can only answer questions about schemas and operations. -It cannot answer questions like: - -- Which operations serve `text/event-stream` responses? -- Which parameters are deprecated across the API? -- What content types does a specific endpoint accept? -- Which responses have no content body? - -The graph builds operation→schema edges but **discards** content-type, parameter, -response, and header information in the process. These are first-class OpenAPI -constructs that users need to query. - -## Design Principles - -1. **Two sources, many stages.** `operations` and `schemas` are the only pipeline - entry points. Everything else is reached by navigation stages. -2. **Navigation over filtering.** `schemas.components` and `schemas.inline` are - removed. Use `where(isComponent)` / `where(isInline)` instead. -3. **Context propagation.** Navigation stages propagate parent context as fields - on child rows. A content-type row carries `statusCode` from its response and - `op_idx` from its operation — no special lineage system needed. -4. **Position disambiguates.** `schemas` as the first token = source (all schemas). - `operations | schemas` = navigation stage. Same word, different meaning based - on position — same as current behavior. - -## Breaking Changes - -| Change | Before | After | -|--------|--------|-------| -| `schemas.components` | Component schemas | Removed — use `schemas \| where(isComponent)` | -| `schemas.inline` | Inline schemas | Removed — use `schemas \| where(isInline)` | - -Note: `schemas` source continues to return **all** schemas (components + inline). -This preserves the ability to query the full schema set. The sub-sources are -removed because they're just `where()` predicates. - -## New Row Types - -### ParameterRow - -Produced by: `operations | parameters` - -| Field | Type | Description | -|-------|------|-------------| -| `name` | string | Parameter name | -| `in` | string | Location: path, query, header, cookie | -| `required` | bool | Whether required | -| `deprecated` | bool | Whether deprecated | -| `description` | string | Parameter description | -| `style` | string | Serialization style | -| `explode` | bool | Whether to explode | -| `hasSchema` | bool | Whether a schema is defined | -| `allowEmptyValue` | bool | Whether empty values are allowed | -| `allowReserved` | bool | Whether reserved chars are allowed | -| `operation` | string | Source operation name (inherited) | - -### ResponseRow - -Produced by: `operations | responses` - -| Field | Type | Description | -|-------|------|-------------| -| `statusCode` | string | "200", "404", "default", etc. | -| `description` | string | Response description | -| `contentTypeCount` | int | Number of media types | -| `headerCount` | int | Number of headers | -| `linkCount` | int | Number of links | -| `hasContent` | bool | Whether content is defined | -| `operation` | string | Source operation name (inherited) | - -### RequestBodyRow - -Produced by: `operations | request-body` - -| Field | Type | Description | -|-------|------|-------------| -| `description` | string | Request body description | -| `required` | bool | Whether required | -| `contentTypeCount` | int | Number of media types | -| `operation` | string | Source operation name (inherited) | - -### ContentTypeRow - -Produced by: `responses | content-types` or `request-body | content-types` - -| Field | Type | Description | -|-------|------|-------------| -| `mediaType` | string | "application/json", "text/event-stream", etc. | -| `hasSchema` | bool | Whether a schema is defined | -| `hasEncoding` | bool | Whether encoding is defined | -| `hasExample` | bool | Whether examples are defined | -| `statusCode` | string | Response status code (propagated from parent response, empty for request body) | -| `operation` | string | Source operation name (inherited) | - -### HeaderRow - -Produced by: `responses | headers` - -| Field | Type | Description | -|-------|------|-------------| -| `name` | string | Header name | -| `description` | string | Header description | -| `required` | bool | Whether required | -| `deprecated` | bool | Whether deprecated | -| `hasSchema` | bool | Whether a schema is defined | -| `statusCode` | string | Response status code (propagated from parent response) | -| `operation` | string | Source operation name (inherited) | - -## Navigation Map - -Shows which stages are valid from each row type: - -``` -operations ──┬── parameters ────── schema - ├── responses ──┬── content-types ── schema - │ └── headers ──────── schema - ├── request-body ── content-types ── schema - └── schemas ──────── (existing graph traversal) - -schemas ─────┬── refs(out), refs(in), reachable, ancestors - ├── properties, members, items - ├── ops - └── (all existing schema traversal stages) -``` - -### Back-navigation - -Every non-schema row carries the index of the operation it originated from. - -- `operation` stage: from any row type with an OpIdx → yields the OperationResult -- Works on: parameters, responses, request-body, content-types, headers -- Enables: `operations | responses | content-types | where(...) | operation | unique` - -### Schema Resolution - -- `schema` stage (singular): extracts the schema from a parameter, content-type, - or header row. Yields a SchemaResult row. If the schema exists in the graph - (via pointer lookup), uses the graph node. Otherwise yields nothing. -- This bridges the navigational model back into the existing graph traversal - system — once you have a schema row, all existing stages work. -- `schema` is distinct from `schemas` — singular extracts one schema from a - navigational row, plural navigates operations→all reachable schemas. - -## Example Queries (New Capabilities) - -```bash -# Operations serving SSE responses -operations | responses | content-types | where(mediaType == "text/event-stream") | operation | unique - -# All content types used across the API -operations | responses | content-types | select mediaType | unique | sort-by(mediaType) - -# Operations with deprecated parameters -operations | parameters | where(deprecated) | operation | unique - -# Cookie parameters (potential security review) -operations | parameters | where(in == "cookie") | select name, in, operation - -# Responses without content bodies -operations | responses | where(not hasContent) | select statusCode, description, operation - -# Schema for a specific content type -operations | where(name == "createUser") | request-body | content-types | where(mediaType == "application/json") | to-schema - -# Headers on error responses -operations | responses | where(statusCode matches "^[45]") | headers | select name, required - -# Operations that accept multipart uploads -operations | request-body | content-types | where(mediaType matches "multipart/") | operation | unique - -# Content-types on 200 responses only -operations | responses | content-types | where(statusCode == "200") | select mediaType, operation -``` - -## Emit Attribution Fix - -Separate from the navigation overhaul: `emit` should use `path` instead of `name` -as the YAML wrapper key. This gives full JSON pointer attribution: - -```yaml -# Before -/properties/vault_url: - type: string - format: uri - -# After -#/components/schemas/VaultConfig/properties/vault_url: - type: string - format: uri -``` - -## Implementation Strategy - -### Row Struct Extension - -Add typed fields for navigation context (matching existing pattern of typed fields): - -```go -type Row struct { - Kind ResultKind - SchemaIdx int - OpIdx int - - // Edge annotations (existing) - Via, Key, From string - - // Group annotations (existing) - GroupKey string - GroupCount int - GroupNames []string - - // Navigation objects (new) — one is set based on Kind - Parameter *openapi.Parameter - Response *openapi.Response - RequestBody *openapi.RequestBody - MediaType *openapi.MediaType - Header *openapi.Header - - // Propagated context (new) - StatusCode string // propagated from response to content-types/headers - MediaTypeName string // the media type key (e.g., "application/json") - HeaderName string // the header name - ParamName string // parameter name - SourceOpIdx int // operation this row originated from (-1 if N/A) -} -``` - -### No Graph Changes Required - -The new navigation stages work directly with the `Operation` object stored on -`OperationNode`. No changes to the graph package are needed. - -The `schema` stage needs to resolve schema pointers back to graph nodes. Add a -public `SchemaByPtr(*oas3.JSONSchemaReferenceable) (NodeID, bool)` method to -`SchemaGraph`. - -### Phases - -**Phase 1: Foundation** -- Add new ResultKind constants -- Extend Row struct with typed navigation fields -- Add `fieldValue` cases for new row types -- Add `defaultFieldsForKind` for new row types -- Remove `schemas.components` and `schemas.inline` sources -- Fix `emit` to use `path` as YAML key - -**Phase 2: Operation Navigation** -- `parameters` stage -- `responses` stage -- `request-body` stage -- `operation` back-navigation stage -- `SchemaByPtr` method on graph - -**Phase 3: Deep Navigation** -- `content-types` stage (from responses and request-body) -- `headers` stage (from responses) -- `schema` (singular) stage (from parameters, content-types, headers) - -**Phase 4: Cleanup** -- Update query reference docs -- Update oq/README.md -- Update CLI README -- Remove dead code from old schemas.components/schemas.inline paths - -## What Gets Removed - -- `schemas.components` source -- `schemas.inline` source -- Tests for removed sources (update to use `schemas | where(isComponent)` etc.) - -## What Does NOT Change - -- `schemas` source (still returns all schemas) -- All existing schema traversal stages -- All existing expression language features -- All existing filter/transform stages -- The module/def system -- Output formats (table, json, markdown, toon) -- The graph package (except adding SchemaByPtr) - -## Deferred (Future Work) - -- **Reverse navigation** (`usages` stage): schema → content-types/parameters that - reference it. Requires graph changes to store usage context during construction. -- **Webhooks source**: OAS 3.1 webhooks as a third entry point or merged into - operations with an `isWebhook` field. -- **Security schemes**: queryable security requirements on operations. -- **Links**: response link objects as a navigable construct. From eb4cf0510ca7344732eb02f87b124267e21e778d Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Mon, 16 Mar 2026 14:05:27 +0000 Subject: [PATCH 3/5] feat(oq): add `oq` as alias for `query` subcommand --- cmd/openapi/commands/openapi/query.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index a3434aa..c332e71 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -14,8 +14,9 @@ import ( ) var queryCmd = &cobra.Command{ - Use: "query [input-file]", - Short: "Query an OpenAPI specification using the oq pipeline language", + Use: "query [input-file]", + Aliases: []string{"oq"}, + Short: "Query an OpenAPI specification using the oq pipeline language", Long: `Query an OpenAPI specification using the oq pipeline language to answer structural and semantic questions about schemas, operations, parameters, responses, content types, and headers.`, From e465ceddbe12306dd2043c0fc5e47d55a9371af9 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Mon, 16 Mar 2026 14:07:27 +0000 Subject: [PATCH 4/5] feat(oq): add top-level `openapi oq` shortcut for query command --- cmd/openapi/commands/openapi/root.go | 3 +++ cmd/openapi/main.go | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/cmd/openapi/commands/openapi/root.go b/cmd/openapi/commands/openapi/root.go index 72562b0..b4e8964 100644 --- a/cmd/openapi/commands/openapi/root.go +++ b/cmd/openapi/commands/openapi/root.go @@ -2,6 +2,9 @@ package openapi import "github.com/spf13/cobra" +// OqCmd returns the oq query command for use as a top-level shortcut. +func OqCmd() *cobra.Command { return queryCmd } + // Apply adds OpenAPI commands to the provided root command func Apply(rootCmd *cobra.Command) { rootCmd.AddCommand(validateCmd) diff --git a/cmd/openapi/main.go b/cmd/openapi/main.go index 7e87529..98b8dc8 100644 --- a/cmd/openapi/main.go +++ b/cmd/openapi/main.go @@ -163,6 +163,12 @@ func init() { rootCmd.AddCommand(arazzoCmds) rootCmd.AddCommand(overlayCmds) + // Top-level shortcut: openapi oq → openapi spec query + oqShortcut := *openapiCmd.OqCmd() + oqShortcut.Use = "oq [input-file]" + oqShortcut.Aliases = nil // no aliases at top level + rootCmd.AddCommand(&oqShortcut) + // Global flags rootCmd.PersistentFlags().BoolP("verbose", "v", false, "verbose output") } From 2030da0c4715ba57a16efeb610d3b943010ab811 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Mon, 16 Mar 2026 14:14:58 +0000 Subject: [PATCH 5/5] fix(oq): remove redundant `spec oq` alias --- cmd/openapi/commands/openapi/query.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index c332e71..a3434aa 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -14,9 +14,8 @@ import ( ) var queryCmd = &cobra.Command{ - Use: "query [input-file]", - Aliases: []string{"oq"}, - Short: "Query an OpenAPI specification using the oq pipeline language", + Use: "query [input-file]", + Short: "Query an OpenAPI specification using the oq pipeline language", Long: `Query an OpenAPI specification using the oq pipeline language to answer structural and semantic questions about schemas, operations, parameters, responses, content types, and headers.`,