diff --git a/bundle/lsp/definition.go b/bundle/lsp/definition.go new file mode 100644 index 0000000000..56840090e2 --- /dev/null +++ b/bundle/lsp/definition.go @@ -0,0 +1,129 @@ +package lsp + +import ( + "fmt" + "regexp" + "strings" + + "github.com/databricks/cli/libs/dyn" +) + +// Interpolation regex copied from libs/dyn/dynvar/ref.go to avoid coupling LSP to dynvar internals. +var interpolationRe = regexp.MustCompile( + fmt.Sprintf(`\$\{(%s(\.%s(\[[0-9]+\])*)*(\[[0-9]+\])*)\}`, + `[a-zA-Z]+([-_]*[a-zA-Z0-9]+)*`, + `[a-zA-Z]+([-_]*[a-zA-Z0-9]+)*`, + ), +) + +// InterpolationRef represents a ${...} reference found at a cursor position. +type InterpolationRef struct { + Path string // e.g., "resources.jobs.my_job.name" + Range Range // range of the full "${...}" token +} + +// FindInterpolationAtPosition finds the ${...} expression the cursor is inside. +func FindInterpolationAtPosition(lines []string, pos Position) (InterpolationRef, bool) { + if pos.Line < 0 || pos.Line >= len(lines) { + return InterpolationRef{}, false + } + + line := lines[pos.Line] + matches := interpolationRe.FindAllStringSubmatchIndex(line, -1) + for _, m := range matches { + // m[0]:m[1] is the full match "${...}" + // m[2]:m[3] is the first capture group (the path inside ${}) + start := m[0] + end := m[1] + if pos.Character >= start && pos.Character < end { + path := line[m[2]:m[3]] + return InterpolationRef{ + Path: path, + Range: Range{ + Start: Position{Line: pos.Line, Character: start}, + End: Position{Line: pos.Line, Character: end}, + }, + }, true + } + } + return InterpolationRef{}, false +} + +// ResolveDefinition resolves a path string against the merged tree and returns its source location. +func ResolveDefinition(tree dyn.Value, pathStr string) (dyn.Location, bool) { + if !tree.IsValid() { + return dyn.Location{}, false + } + + // Handle var.X shorthand: rewrite to variables.X. + if strings.HasPrefix(pathStr, "var.") { + pathStr = "variables." + strings.TrimPrefix(pathStr, "var.") + } + + p, err := dyn.NewPathFromString(pathStr) + if err != nil { + return dyn.Location{}, false + } + + v, err := dyn.GetByPath(tree, p) + if err != nil { + return dyn.Location{}, false + } + + loc := v.Location() + if loc.File == "" { + return dyn.Location{}, false + } + return loc, true +} + +// InterpolationReference records a ${...} reference found in the merged tree. +type InterpolationReference struct { + Path string // dyn path where the reference was found + Location dyn.Location // source location of the string containing the reference + RefStr string // the full "${...}" expression +} + +// FindInterpolationReferences walks the merged tree to find all ${...} string values +// whose reference path starts with the given resource path prefix. +func FindInterpolationReferences(tree dyn.Value, resourcePath string) []InterpolationReference { + if !tree.IsValid() { + return nil + } + + var refs []InterpolationReference + dyn.Walk(tree, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { //nolint:errcheck + s, ok := v.AsString() + if !ok { + return v, nil + } + + matches := interpolationRe.FindAllStringSubmatch(s, -1) + for _, m := range matches { + refPath := m[1] + if refPath == resourcePath || strings.HasPrefix(refPath, resourcePath+".") { + refs = append(refs, InterpolationReference{ + Path: p.String(), + Location: v.Location(), + RefStr: m[0], + }) + } + } + return v, nil + }) + + return refs +} + +// DynLocationToLSPLocation converts a 1-based dyn.Location to a 0-based LSPLocation. +func DynLocationToLSPLocation(loc dyn.Location) LSPLocation { + line := max(loc.Line-1, 0) + col := max(loc.Column-1, 0) + return LSPLocation{ + URI: PathToURI(loc.File), + Range: Range{ + Start: Position{Line: line, Character: col}, + End: Position{Line: line, Character: col}, + }, + } +} diff --git a/bundle/lsp/definition_test.go b/bundle/lsp/definition_test.go new file mode 100644 index 0000000000..b212307166 --- /dev/null +++ b/bundle/lsp/definition_test.go @@ -0,0 +1,142 @@ +package lsp_test + +import ( + "strings" + "testing" + + "github.com/databricks/cli/bundle/lsp" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/yamlloader" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestFindInterpolationAtPositionBasic(t *testing.T) { + lines := []string{` name: "${resources.jobs.my_job.name}"`} + // Cursor inside the interpolation. + ref, ok := lsp.FindInterpolationAtPosition(lines, lsp.Position{Line: 0, Character: 15}) + require.True(t, ok) + assert.Equal(t, "resources.jobs.my_job.name", ref.Path) +} + +func TestFindInterpolationAtPositionMultiple(t *testing.T) { + lines := []string{`value: "${a.b} and ${c.d}"`} + // Cursor on the second interpolation. + ref, ok := lsp.FindInterpolationAtPosition(lines, lsp.Position{Line: 0, Character: 21}) + require.True(t, ok) + assert.Equal(t, "c.d", ref.Path) +} + +func TestFindInterpolationAtPositionOutside(t *testing.T) { + lines := []string{`value: "${a.b} plain text ${c.d}"`} + // Cursor on "plain text" between the two interpolations. + _, ok := lsp.FindInterpolationAtPosition(lines, lsp.Position{Line: 0, Character: 16}) + assert.False(t, ok) +} + +func TestFindInterpolationAtPositionAtDollar(t *testing.T) { + lines := []string{`name: "${var.foo}"`} + // Cursor on the "$" character. + idx := strings.Index(lines[0], "$") + ref, ok := lsp.FindInterpolationAtPosition(lines, lsp.Position{Line: 0, Character: idx}) + require.True(t, ok) + assert.Equal(t, "var.foo", ref.Path) +} + +func TestFindInterpolationAtPositionNone(t *testing.T) { + lines := []string{`name: "plain string"`} + _, ok := lsp.FindInterpolationAtPosition(lines, lsp.Position{Line: 0, Character: 10}) + assert.False(t, ok) +} + +func TestResolveDefinition(t *testing.T) { + yaml := ` +resources: + jobs: + my_job: + name: "ETL" +` + tree, err := yamlloader.LoadYAML("test.yml", strings.NewReader(yaml)) + require.NoError(t, err) + + loc, ok := lsp.ResolveDefinition(tree, "resources.jobs.my_job") + require.True(t, ok) + assert.Equal(t, "test.yml", loc.File) + assert.Greater(t, loc.Line, 0) +} + +func TestResolveDefinitionVarShorthand(t *testing.T) { + yaml := ` +variables: + foo: + default: "bar" +` + tree, err := yamlloader.LoadYAML("test.yml", strings.NewReader(yaml)) + require.NoError(t, err) + + loc, ok := lsp.ResolveDefinition(tree, "var.foo") + require.True(t, ok) + assert.Equal(t, "test.yml", loc.File) +} + +func TestResolveDefinitionInvalid(t *testing.T) { + yaml := ` +resources: + jobs: + my_job: + name: "ETL" +` + tree, err := yamlloader.LoadYAML("test.yml", strings.NewReader(yaml)) + require.NoError(t, err) + + _, ok := lsp.ResolveDefinition(tree, "resources.jobs.nonexistent") + assert.False(t, ok) +} + +func TestFindInterpolationReferences(t *testing.T) { + yaml := ` +resources: + jobs: + my_job: + name: "ETL" + pipelines: + my_pipeline: + name: "${resources.jobs.my_job.name}" + settings: + target: "${resources.jobs.my_job.id}" +` + tree, err := yamlloader.LoadYAML("test.yml", strings.NewReader(yaml)) + require.NoError(t, err) + + refs := lsp.FindInterpolationReferences(tree, "resources.jobs.my_job") + require.Len(t, refs, 2) + assert.Contains(t, refs[0].RefStr, "resources.jobs.my_job") + assert.Contains(t, refs[1].RefStr, "resources.jobs.my_job") +} + +func TestFindInterpolationReferencesNoMatch(t *testing.T) { + yaml := ` +resources: + jobs: + my_job: + name: "${var.name}" +` + tree, err := yamlloader.LoadYAML("test.yml", strings.NewReader(yaml)) + require.NoError(t, err) + + refs := lsp.FindInterpolationReferences(tree, "resources.jobs.my_job") + assert.Empty(t, refs) +} + +func TestDynLocationToLSPLocation(t *testing.T) { + loc := dyn.Location{ + File: "/path/to/file.yml", + Line: 5, + Column: 10, + } + + lspLoc := lsp.DynLocationToLSPLocation(loc) + assert.Equal(t, "file:///path/to/file.yml", lspLoc.URI) + assert.Equal(t, 4, lspLoc.Range.Start.Line) + assert.Equal(t, 9, lspLoc.Range.Start.Character) +} diff --git a/bundle/lsp/documents.go b/bundle/lsp/documents.go new file mode 100644 index 0000000000..d388592868 --- /dev/null +++ b/bundle/lsp/documents.go @@ -0,0 +1,105 @@ +package lsp + +import ( + "net/url" + "path/filepath" + "runtime" + "strings" + "sync" + + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/yamlloader" +) + +// Document tracks the state of an open text document. +type Document struct { + URI string + Version int + Content string + Lines []string // split by newline for position lookup + Value dyn.Value // parsed YAML (may be invalid) +} + +// DocumentStore manages open text documents. +type DocumentStore struct { + mu sync.RWMutex + docs map[string]*Document +} + +// NewDocumentStore creates an empty document store. +func NewDocumentStore() *DocumentStore { + return &DocumentStore{docs: make(map[string]*Document)} +} + +// Open registers a newly opened document. +func (s *DocumentStore) Open(uri string, version int, content string) { + doc := &Document{ + URI: uri, + Version: version, + Content: content, + Lines: strings.Split(content, "\n"), + } + doc.parse() + s.mu.Lock() + s.docs[uri] = doc + s.mu.Unlock() +} + +// Change updates the content of an already-open document. +func (s *DocumentStore) Change(uri string, version int, content string) { + s.mu.Lock() + doc, ok := s.docs[uri] + if ok { + doc.Version = version + doc.Content = content + doc.Lines = strings.Split(content, "\n") + doc.parse() + } + s.mu.Unlock() +} + +// Close removes a document from the store. +func (s *DocumentStore) Close(uri string) { + s.mu.Lock() + delete(s.docs, uri) + s.mu.Unlock() +} + +// Get returns the document for the given URI, or nil if not found. +func (s *DocumentStore) Get(uri string) *Document { + s.mu.RLock() + defer s.mu.RUnlock() + return s.docs[uri] +} + +func (doc *Document) parse() { + path := URIToPath(doc.URI) + v, err := yamlloader.LoadYAML(path, strings.NewReader(doc.Content)) + if err != nil { + doc.Value = dyn.InvalidValue + return + } + doc.Value = v +} + +// URIToPath converts a file:// URI to a filesystem path. +func URIToPath(uri string) string { + u, err := url.Parse(uri) + if err != nil { + return uri + } + p := u.Path + // On Windows, file URIs look like file:///C:/path + if runtime.GOOS == "windows" && len(p) > 0 && p[0] == '/' { + p = p[1:] + } + return p +} + +// PathToURI converts a filesystem path to a file:// URI. +func PathToURI(path string) string { + if runtime.GOOS == "windows" { + path = "/" + filepath.ToSlash(path) + } + return "file://" + path +} diff --git a/bundle/lsp/protocol.go b/bundle/lsp/protocol.go new file mode 100644 index 0000000000..3fcffb5e80 --- /dev/null +++ b/bundle/lsp/protocol.go @@ -0,0 +1,126 @@ +package lsp + +// InitializeParams holds the parameters sent by the client in the initialize request. +type InitializeParams struct { + ProcessID int `json:"processId"` + RootURI string `json:"rootUri"` + RootPath string `json:"rootPath"` +} + +// InitializeResult holds the response to the initialize request. +type InitializeResult struct { + Capabilities ServerCapabilities `json:"capabilities"` +} + +// ServerCapabilities describes the capabilities the server supports. +type ServerCapabilities struct { + TextDocumentSync *TextDocumentSyncOptions `json:"textDocumentSync,omitempty"` + HoverProvider bool `json:"hoverProvider,omitempty"` + DocumentLinkProvider *DocumentLinkOptions `json:"documentLinkProvider,omitempty"` + DefinitionProvider bool `json:"definitionProvider,omitempty"` +} + +// DefinitionParams holds the parameters for textDocument/definition. +type DefinitionParams struct { + TextDocument TextDocumentIdentifier `json:"textDocument"` + Position Position `json:"position"` +} + +// LSPLocation represents a location in a document (used for definition results). +type LSPLocation struct { + URI string `json:"uri"` + Range Range `json:"range"` +} + +// TextDocumentSyncOptions describes how text document syncing works. +type TextDocumentSyncOptions struct { + OpenClose bool `json:"openClose"` + Change int `json:"change"` // 1 = Full, 2 = Incremental +} + +// DocumentLinkOptions describes options for the document link provider. +type DocumentLinkOptions struct { + ResolveProvider bool `json:"resolveProvider"` +} + +// TextDocumentIdentifier identifies a text document by its URI. +type TextDocumentIdentifier struct { + URI string `json:"uri"` +} + +// TextDocumentItem represents an open text document. +type TextDocumentItem struct { + URI string `json:"uri"` + LanguageID string `json:"languageId"` + Version int `json:"version"` + Text string `json:"text"` +} + +// VersionedTextDocumentIdentifier identifies a specific version of a text document. +type VersionedTextDocumentIdentifier struct { + URI string `json:"uri"` + Version int `json:"version"` +} + +// TextDocumentContentChangeEvent describes a change to a text document. +type TextDocumentContentChangeEvent struct { + Text string `json:"text"` +} + +// DidOpenTextDocumentParams holds the parameters for textDocument/didOpen. +type DidOpenTextDocumentParams struct { + TextDocument TextDocumentItem `json:"textDocument"` +} + +// DidChangeTextDocumentParams holds the parameters for textDocument/didChange. +type DidChangeTextDocumentParams struct { + TextDocument VersionedTextDocumentIdentifier `json:"textDocument"` + ContentChanges []TextDocumentContentChangeEvent `json:"contentChanges"` +} + +// DidCloseTextDocumentParams holds the parameters for textDocument/didClose. +type DidCloseTextDocumentParams struct { + TextDocument TextDocumentIdentifier `json:"textDocument"` +} + +// Position represents a zero-based line and character offset. +type Position struct { + Line int `json:"line"` + Character int `json:"character"` +} + +// Range represents a span of text in a document. +type Range struct { + Start Position `json:"start"` + End Position `json:"end"` +} + +// DocumentLinkParams holds the parameters for textDocument/documentLink. +type DocumentLinkParams struct { + TextDocument TextDocumentIdentifier `json:"textDocument"` +} + +// DocumentLink represents a clickable link in a document. +type DocumentLink struct { + Range Range `json:"range"` + Target string `json:"target"` + Tooltip string `json:"tooltip,omitempty"` +} + +// HoverParams holds the parameters for textDocument/hover. +type HoverParams struct { + TextDocument TextDocumentIdentifier `json:"textDocument"` + Position Position `json:"position"` +} + +// Hover represents the result of a hover request. +type Hover struct { + Contents MarkupContent `json:"contents"` + Range *Range `json:"range,omitempty"` +} + +// MarkupContent represents marked-up text for display. +type MarkupContent struct { + Kind string `json:"kind"` // "plaintext" or "markdown" + Value string `json:"value"` +} diff --git a/bundle/lsp/resource_index.go b/bundle/lsp/resource_index.go new file mode 100644 index 0000000000..0fd703f108 --- /dev/null +++ b/bundle/lsp/resource_index.go @@ -0,0 +1,68 @@ +package lsp + +import ( + "fmt" + + "github.com/databricks/cli/libs/dyn" +) + +// ResourceEntry represents a resource definition found in YAML. +type ResourceEntry struct { + Type string // e.g., "jobs", "pipelines" + Key string // e.g., "my_etl_job" + KeyRange Range // position of the key in the YAML file + Path string // e.g., "resources.jobs.my_etl_job" +} + +// IndexResources walks a parsed YAML dyn.Value and finds all resource definitions +// under "resources..". +func IndexResources(doc *Document) []ResourceEntry { + if !doc.Value.IsValid() { + return nil + } + + resources := doc.Value.Get("resources") + if resources.Kind() != dyn.KindMap { + return nil + } + + m, ok := resources.AsMap() + if !ok { + return nil + } + + var entries []ResourceEntry + for _, typePair := range m.Pairs() { + resourceType := typePair.Key.MustString() + typeVal := typePair.Value + if typeVal.Kind() != dyn.KindMap { + continue + } + + typeMap, ok := typeVal.AsMap() + if !ok { + continue + } + + for _, resPair := range typeMap.Pairs() { + key := resPair.Key.MustString() + keyLoc := resPair.Key.Location() + + // dyn.Location uses 1-based line/column; LSP uses 0-based. + startLine := max(keyLoc.Line-1, 0) + startChar := max(keyLoc.Column-1, 0) + + entries = append(entries, ResourceEntry{ + Type: resourceType, + Key: key, + KeyRange: Range{ + Start: Position{Line: startLine, Character: startChar}, + End: Position{Line: startLine, Character: startChar + len(key)}, + }, + Path: fmt.Sprintf("resources.%s.%s", resourceType, key), + }) + } + } + + return entries +} diff --git a/bundle/lsp/server.go b/bundle/lsp/server.go new file mode 100644 index 0000000000..12338f2c2f --- /dev/null +++ b/bundle/lsp/server.go @@ -0,0 +1,430 @@ +package lsp + +import ( + "context" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/creachadair/jrpc2" + "github.com/creachadair/jrpc2/channel" + "github.com/creachadair/jrpc2/handler" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/merge" + "github.com/databricks/cli/libs/dyn/yamlloader" +) + +// TargetState holds deployment state for a single target. +type TargetState struct { + Host string + ResourceState map[string]ResourceInfo +} + +// Server is the DABs LSP server. +type Server struct { + documents *DocumentStore + bundleRoot string + target string + workspaceHost string + resourceState map[string]ResourceInfo + mergedTree dyn.Value + allTargetState map[string]TargetState +} + +// NewServer creates a new LSP server. +func NewServer() *Server { + return &Server{ + documents: NewDocumentStore(), + resourceState: make(map[string]ResourceInfo), + allTargetState: make(map[string]TargetState), + } +} + +// Run starts the LSP server on stdin/stdout. +func (s *Server) Run(ctx context.Context) error { + mux := handler.Map{ + "initialize": handler.New(s.handleInitialize), + "initialized": handler.New(s.handleInitialized), + "shutdown": handler.New(s.handleShutdown), + "textDocument/didOpen": handler.New(s.handleTextDocumentDidOpen), + "textDocument/didChange": handler.New(s.handleTextDocumentDidChange), + "textDocument/didClose": handler.New(s.handleTextDocumentDidClose), + "textDocument/documentLink": handler.New(s.handleDocumentLink), + "textDocument/hover": handler.New(s.handleHover), + "textDocument/definition": handler.New(s.handleDefinition), + } + + srv := jrpc2.NewServer(mux, &jrpc2.ServerOptions{ + AllowPush: true, + }) + ch := channel.LSP(os.Stdin, os.Stdout) + srv.Start(ch) + return srv.Wait() +} + +func (s *Server) handleInitialize(_ context.Context, params InitializeParams) (InitializeResult, error) { + if params.RootURI != "" { + s.bundleRoot = URIToPath(params.RootURI) + } else if params.RootPath != "" { + s.bundleRoot = params.RootPath + } + + s.loadBundleInfo() + + return InitializeResult{ + Capabilities: ServerCapabilities{ + TextDocumentSync: &TextDocumentSyncOptions{ + OpenClose: true, + Change: 1, // Full sync + }, + HoverProvider: true, + DocumentLinkProvider: &DocumentLinkOptions{ + ResolveProvider: false, + }, + DefinitionProvider: true, + }, + }, nil +} + +func (s *Server) handleInitialized(_ context.Context) error { + return nil +} + +func (s *Server) handleShutdown(_ context.Context) error { + return nil +} + +func (s *Server) handleTextDocumentDidOpen(_ context.Context, params DidOpenTextDocumentParams) error { + s.documents.Open(params.TextDocument.URI, params.TextDocument.Version, params.TextDocument.Text) + if s.isRootConfig(params.TextDocument.URI) { + s.loadBundleInfo() + } + return nil +} + +func (s *Server) handleTextDocumentDidChange(_ context.Context, params DidChangeTextDocumentParams) error { + if len(params.ContentChanges) > 0 { + s.documents.Change(params.TextDocument.URI, params.TextDocument.Version, params.ContentChanges[len(params.ContentChanges)-1].Text) + } + return nil +} + +func (s *Server) handleTextDocumentDidClose(_ context.Context, params DidCloseTextDocumentParams) error { + s.documents.Close(params.TextDocument.URI) + return nil +} + +func (s *Server) handleDocumentLink(_ context.Context, params DocumentLinkParams) ([]DocumentLink, error) { + doc := s.documents.Get(params.TextDocument.URI) + if doc == nil { + return nil, nil + } + + entries := IndexResources(doc) + var links []DocumentLink + for _, entry := range entries { + u := s.resolveResourceURL(entry) + if u == "" { + continue + } + links = append(links, DocumentLink{ + Range: entry.KeyRange, + Target: u, + Tooltip: fmt.Sprintf("Open %s '%s' in Databricks", entry.Type, entry.Key), + }) + } + return links, nil +} + +func (s *Server) handleHover(_ context.Context, params HoverParams) (*Hover, error) { + doc := s.documents.Get(params.TextDocument.URI) + if doc == nil { + return nil, nil + } + + entries := IndexResources(doc) + for _, entry := range entries { + if PositionInRange(params.Position, entry.KeyRange) { + content := s.buildHoverContent(entry) + return &Hover{ + Contents: MarkupContent{ + Kind: "markdown", + Value: content, + }, + Range: &entry.KeyRange, + }, nil + } + } + + return nil, nil +} + +// findRootConfig returns the path to the root bundle config file, or "" if not found. +func (s *Server) findRootConfig() string { + for _, name := range []string{"databricks.yml", "databricks.yaml", "bundle.yml", "bundle.yaml"} { + p := filepath.Join(s.bundleRoot, name) + if _, err := os.Stat(p); err == nil { + return p + } + } + return "" +} + +// loadBundleInfo reads bundle config and deployment state. +func (s *Server) loadBundleInfo() { + if s.bundleRoot == "" { + return + } + + configPath := s.findRootConfig() + if configPath == "" { + return + } + + data, err := os.ReadFile(configPath) + if err != nil { + return + } + + v, err := yamlloader.LoadYAML(configPath, strings.NewReader(string(data))) + if err != nil { + return + } + + if s.workspaceHost == "" { + s.workspaceHost = LoadWorkspaceHost(v) + } + + target := s.target + if target == "" { + target = LoadTarget(v) + s.target = target + } + + s.resourceState = LoadResourceState(s.bundleRoot, target) + + // Build URLs for resources that have IDs but no URL yet. + if s.workspaceHost != "" { + for key, info := range s.resourceState { + if info.URL == "" && info.ID != "" { + parts := strings.SplitN(key, ".", 3) + if len(parts) == 3 { + info.URL = BuildResourceURL(s.workspaceHost, parts[1], info.ID) + s.resourceState[key] = info + } + } + } + } + + s.loadMergedTree(configPath, v) + s.loadAllTargetState(v) +} + +// loadMergedTree builds a merged dyn.Value from the root config and all included files. +func (s *Server) loadMergedTree(configPath string, rootValue dyn.Value) { + s.mergedTree = rootValue + + // Extract include patterns. + includes := rootValue.Get("include") + if includes.Kind() != dyn.KindSequence { + return + } + seq, ok := includes.AsSequence() + if !ok { + return + } + + // Collect and expand glob patterns. + seen := map[string]bool{configPath: true} + var paths []string + for _, item := range seq { + pattern, ok := item.AsString() + if !ok { + continue + } + matches, err := filepath.Glob(filepath.Join(s.bundleRoot, pattern)) + if err != nil { + continue + } + for _, m := range matches { + if !seen[m] { + seen[m] = true + paths = append(paths, m) + } + } + } + sort.Strings(paths) + + // Parse and merge each included file. + merged := rootValue + for _, p := range paths { + data, err := os.ReadFile(p) + if err != nil { + continue + } + v, err := yamlloader.LoadYAML(p, strings.NewReader(string(data))) + if err != nil { + continue + } + merged, _ = merge.Merge(merged, v) + } + s.mergedTree = merged +} + +const maxTargets = 10 + +// loadAllTargetState loads resource state for all targets (up to maxTargets). +func (s *Server) loadAllTargetState(v dyn.Value) { + s.allTargetState = make(map[string]TargetState) + + targets := LoadAllTargets(v) + if len(targets) > maxTargets { + targets = targets[:maxTargets] + } + + for _, t := range targets { + host := LoadTargetWorkspaceHost(v, t) + rs := LoadResourceState(s.bundleRoot, t) + + // Build URLs for resources with IDs. + if host != "" { + for key, info := range rs { + if info.URL == "" && info.ID != "" { + parts := strings.SplitN(key, ".", 3) + if len(parts) == 3 { + info.URL = BuildResourceURL(host, parts[1], info.ID) + rs[key] = info + } + } + } + } + + s.allTargetState[t] = TargetState{ + Host: host, + ResourceState: rs, + } + } +} + +func (s *Server) resolveResourceURL(entry ResourceEntry) string { + if info, ok := s.resourceState[entry.Path]; ok { + return info.URL + } + return "" +} + +func (s *Server) handleDefinition(_ context.Context, params DefinitionParams) (any, error) { + doc := s.documents.Get(params.TextDocument.URI) + if doc == nil { + return nil, nil + } + + // Check if cursor is on a ${...} reference. + ref, ok := FindInterpolationAtPosition(doc.Lines, params.Position) + if ok { + loc, found := ResolveDefinition(s.mergedTree, ref.Path) + if !found { + return nil, nil + } + return DynLocationToLSPLocation(loc), nil + } + + // Check if cursor is on a resource key. + entries := IndexResources(doc) + for _, entry := range entries { + if PositionInRange(params.Position, entry.KeyRange) { + refs := FindInterpolationReferences(s.mergedTree, entry.Path) + if len(refs) == 0 { + return nil, nil + } + var locs []LSPLocation + for _, r := range refs { + locs = append(locs, DynLocationToLSPLocation(r.Location)) + } + return locs, nil + } + } + + return nil, nil +} + +func (s *Server) buildHoverContent(entry ResourceEntry) string { + var b strings.Builder + fmt.Fprintf(&b, "**%s** `%s`\n\n", entry.Type, entry.Key) + + // Show per-target state if available. + if len(s.allTargetState) > 0 { + hasAnyState := false + for _, ts := range s.allTargetState { + if _, ok := ts.ResourceState[entry.Path]; ok { + hasAnyState = true + break + } + } + + if hasAnyState { + b.WriteString("**Targets:**\n\n") + + // Sort target names for deterministic output. + targets := LoadAllTargets(s.mergedTree) + for _, t := range targets { + ts, ok := s.allTargetState[t] + if !ok { + continue + } + info, ok := ts.ResourceState[entry.Path] + if !ok { + continue + } + if info.URL != "" { + fmt.Fprintf(&b, "- **%s**: [Open in Databricks](%s) (ID: `%s`)\n", t, info.URL, info.ID) + } else if info.ID != "" { + fmt.Fprintf(&b, "- **%s**: ID: `%s`\n", t, info.ID) + } + } + return b.String() + } + } + + // Fall back to default target state. + info, hasState := s.resourceState[entry.Path] + if hasState && info.ID != "" { + fmt.Fprintf(&b, "**ID:** `%s`\n\n", info.ID) + } + if hasState && info.Name != "" { + fmt.Fprintf(&b, "**Name:** %s\n\n", info.Name) + } + if hasState && info.URL != "" { + fmt.Fprintf(&b, "[Open in Databricks](%s)", info.URL) + } else { + b.WriteString("_Not yet deployed. Run `databricks bundle deploy` to create this resource._") + } + + return b.String() +} + +func (s *Server) isRootConfig(uri string) bool { + base := filepath.Base(URIToPath(uri)) + return base == "databricks.yml" || base == "databricks.yaml" || base == "bundle.yml" || base == "bundle.yaml" +} + +// SetTarget sets the target for the server. +func (s *Server) SetTarget(target string) { + s.target = target +} + +// PositionInRange checks if a position is within a range (inclusive start, exclusive end). +func PositionInRange(pos Position, r Range) bool { + if pos.Line < r.Start.Line || pos.Line > r.End.Line { + return false + } + if pos.Line == r.Start.Line && pos.Character < r.Start.Character { + return false + } + if pos.Line == r.End.Line && pos.Character >= r.End.Character { + return false + } + return true +} diff --git a/bundle/lsp/server_integration_test.go b/bundle/lsp/server_integration_test.go new file mode 100644 index 0000000000..f235bc0976 --- /dev/null +++ b/bundle/lsp/server_integration_test.go @@ -0,0 +1,657 @@ +package lsp + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/creachadair/jrpc2" + "github.com/creachadair/jrpc2/channel" + "github.com/creachadair/jrpc2/handler" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const testBundleYAML = `bundle: + name: test-bundle +workspace: + host: "https://my-workspace.databricks.com" +targets: + dev: + default: true +resources: + jobs: + my_job: + name: "My Job" + pipelines: + my_pipeline: + name: "My Pipeline" +` + +func setupTestBundleDir(t *testing.T) string { + t.Helper() + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "databricks.yml"), []byte(testBundleYAML), 0o644)) + + stateDir := filepath.Join(tmpDir, ".databricks", "bundle", "dev") + require.NoError(t, os.MkdirAll(stateDir, 0o755)) + + stateJSON := `{ + "state_version": 1, + "state": { + "resources.jobs.my_job": {"__id__": "12345"}, + "resources.pipelines.my_pipeline": {"__id__": "abc-def"} + } + }` + require.NoError(t, os.WriteFile(filepath.Join(stateDir, "resources.json"), []byte(stateJSON), 0o644)) + + return tmpDir +} + +func newTestClientServer(t *testing.T, srv *Server) *jrpc2.Client { + t.Helper() + + mux := handler.Map{ + "initialize": handler.New(srv.handleInitialize), + "initialized": handler.New(srv.handleInitialized), + "shutdown": handler.New(srv.handleShutdown), + "textDocument/didOpen": handler.New(srv.handleTextDocumentDidOpen), + "textDocument/didChange": handler.New(srv.handleTextDocumentDidChange), + "textDocument/didClose": handler.New(srv.handleTextDocumentDidClose), + "textDocument/documentLink": handler.New(srv.handleDocumentLink), + "textDocument/hover": handler.New(srv.handleHover), + "textDocument/definition": handler.New(srv.handleDefinition), + } + + clientCh, serverCh := channel.Direct() + + jrpcSrv := jrpc2.NewServer(mux, nil) + jrpcSrv.Start(serverCh) + t.Cleanup(func() { jrpcSrv.Stop() }) + + cli := jrpc2.NewClient(clientCh, nil) + t.Cleanup(func() { cli.Close() }) + + return cli +} + +// initializeClient sends the initialize request and returns the result. +func initializeClient(ctx context.Context, t *testing.T, cli *jrpc2.Client, rootURI string) InitializeResult { + t.Helper() + var result InitializeResult + err := cli.CallResult(ctx, "initialize", InitializeParams{ + ProcessID: 1, + RootURI: rootURI, + }, &result) + require.NoError(t, err) + return result +} + +func TestServerHandleInitialize(t *testing.T) { + tmpDir := setupTestBundleDir(t) + srv := NewServer() + cli := newTestClientServer(t, srv) + ctx := t.Context() + + result := initializeClient(ctx, t, cli, PathToURI(tmpDir)) + + assert.True(t, result.Capabilities.HoverProvider) + assert.True(t, result.Capabilities.DefinitionProvider) + require.NotNil(t, result.Capabilities.DocumentLinkProvider) + require.NotNil(t, result.Capabilities.TextDocumentSync) + assert.True(t, result.Capabilities.TextDocumentSync.OpenClose) + assert.Equal(t, 1, result.Capabilities.TextDocumentSync.Change) +} + +func TestServerHandleDocumentLink(t *testing.T) { + tmpDir := setupTestBundleDir(t) + srv := NewServer() + cli := newTestClientServer(t, srv) + ctx := t.Context() + + initializeClient(ctx, t, cli, PathToURI(tmpDir)) + + docURI := PathToURI(filepath.Join(tmpDir, "databricks.yml")) + err := cli.Notify(ctx, "textDocument/didOpen", DidOpenTextDocumentParams{ + TextDocument: TextDocumentItem{ + URI: docURI, + LanguageID: "yaml", + Version: 1, + Text: testBundleYAML, + }, + }) + require.NoError(t, err) + + var links []DocumentLink + err = cli.CallResult(ctx, "textDocument/documentLink", DocumentLinkParams{ + TextDocument: TextDocumentIdentifier{URI: docURI}, + }, &links) + require.NoError(t, err) + require.Len(t, links, 2) + + assert.Contains(t, links[0].Target, "/jobs/12345") + assert.Contains(t, links[0].Tooltip, "my_job") + assert.Contains(t, links[1].Target, "/pipelines/abc-def") + assert.Contains(t, links[1].Tooltip, "my_pipeline") +} + +func TestServerHandleDocumentLinkNoState(t *testing.T) { + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "databricks.yml"), []byte(testBundleYAML), 0o644)) + + srv := NewServer() + cli := newTestClientServer(t, srv) + ctx := t.Context() + + initializeClient(ctx, t, cli, PathToURI(tmpDir)) + + docURI := PathToURI(filepath.Join(tmpDir, "databricks.yml")) + err := cli.Notify(ctx, "textDocument/didOpen", DidOpenTextDocumentParams{ + TextDocument: TextDocumentItem{ + URI: docURI, + LanguageID: "yaml", + Version: 1, + Text: testBundleYAML, + }, + }) + require.NoError(t, err) + + var links []DocumentLink + err = cli.CallResult(ctx, "textDocument/documentLink", DocumentLinkParams{ + TextDocument: TextDocumentIdentifier{URI: docURI}, + }, &links) + require.NoError(t, err) + assert.Empty(t, links) +} + +func TestServerHandleHoverOnResource(t *testing.T) { + tmpDir := setupTestBundleDir(t) + srv := NewServer() + cli := newTestClientServer(t, srv) + ctx := t.Context() + + initializeClient(ctx, t, cli, PathToURI(tmpDir)) + + docURI := PathToURI(filepath.Join(tmpDir, "databricks.yml")) + err := cli.Notify(ctx, "textDocument/didOpen", DidOpenTextDocumentParams{ + TextDocument: TextDocumentItem{ + URI: docURI, + LanguageID: "yaml", + Version: 1, + Text: testBundleYAML, + }, + }) + require.NoError(t, err) + + // Get links to find the position of my_job. + var links []DocumentLink + err = cli.CallResult(ctx, "textDocument/documentLink", DocumentLinkParams{ + TextDocument: TextDocumentIdentifier{URI: docURI}, + }, &links) + require.NoError(t, err) + require.NotEmpty(t, links) + + // Hover at the position of the first link (my_job key). + var hover Hover + err = cli.CallResult(ctx, "textDocument/hover", HoverParams{ + TextDocument: TextDocumentIdentifier{URI: docURI}, + Position: links[0].Range.Start, + }, &hover) + require.NoError(t, err) + assert.Contains(t, hover.Contents.Value, "12345") + assert.Contains(t, hover.Contents.Value, "Open in Databricks") +} + +func TestServerHandleHoverOffResource(t *testing.T) { + tmpDir := setupTestBundleDir(t) + srv := NewServer() + cli := newTestClientServer(t, srv) + ctx := t.Context() + + initializeClient(ctx, t, cli, PathToURI(tmpDir)) + + docURI := PathToURI(filepath.Join(tmpDir, "databricks.yml")) + err := cli.Notify(ctx, "textDocument/didOpen", DidOpenTextDocumentParams{ + TextDocument: TextDocumentItem{ + URI: docURI, + LanguageID: "yaml", + Version: 1, + Text: testBundleYAML, + }, + }) + require.NoError(t, err) + + // Hover at line 0, character 0 which is "bundle:" -- not a resource key. + rsp, err := cli.Call(ctx, "textDocument/hover", HoverParams{ + TextDocument: TextDocumentIdentifier{URI: docURI}, + Position: Position{Line: 0, Character: 0}, + }) + require.NoError(t, err) + + // The handler returns nil for non-resource positions, which is JSON null. + var hover *Hover + err = rsp.UnmarshalResult(&hover) + require.NoError(t, err) + assert.Nil(t, hover) +} + +func TestServerEndToEnd(t *testing.T) { + tmpDir := setupTestBundleDir(t) + srv := NewServer() + cli := newTestClientServer(t, srv) + ctx := t.Context() + + // 1. Initialize. + result := initializeClient(ctx, t, cli, PathToURI(tmpDir)) + assert.True(t, result.Capabilities.HoverProvider) + + // 2. Initialized notification. + err := cli.Notify(ctx, "initialized", nil) + require.NoError(t, err) + + // 3. Open document. + docURI := PathToURI(filepath.Join(tmpDir, "databricks.yml")) + err = cli.Notify(ctx, "textDocument/didOpen", DidOpenTextDocumentParams{ + TextDocument: TextDocumentItem{ + URI: docURI, + LanguageID: "yaml", + Version: 1, + Text: testBundleYAML, + }, + }) + require.NoError(t, err) + + // 4. Get document links. + var links []DocumentLink + err = cli.CallResult(ctx, "textDocument/documentLink", DocumentLinkParams{ + TextDocument: TextDocumentIdentifier{URI: docURI}, + }, &links) + require.NoError(t, err) + require.Len(t, links, 2) + assert.Contains(t, links[0].Target, "/jobs/12345") + assert.Contains(t, links[1].Target, "/pipelines/abc-def") + + // 5. Hover on resource key. + var hover Hover + err = cli.CallResult(ctx, "textDocument/hover", HoverParams{ + TextDocument: TextDocumentIdentifier{URI: docURI}, + Position: links[0].Range.Start, + }, &hover) + require.NoError(t, err) + assert.Contains(t, hover.Contents.Value, "12345") + assert.Contains(t, hover.Contents.Value, "Open in Databricks") + + // 6. Change document content (remove pipelines). + updatedYAML := `bundle: + name: test-bundle +workspace: + host: "https://my-workspace.databricks.com" +targets: + dev: + default: true +resources: + jobs: + my_job: + name: "My Job" +` + err = cli.Notify(ctx, "textDocument/didChange", DidChangeTextDocumentParams{ + TextDocument: VersionedTextDocumentIdentifier{ + URI: docURI, + Version: 2, + }, + ContentChanges: []TextDocumentContentChangeEvent{ + {Text: updatedYAML}, + }, + }) + require.NoError(t, err) + + // 7. Document links should now return only one link. + var linksAfterChange []DocumentLink + err = cli.CallResult(ctx, "textDocument/documentLink", DocumentLinkParams{ + TextDocument: TextDocumentIdentifier{URI: docURI}, + }, &linksAfterChange) + require.NoError(t, err) + require.Len(t, linksAfterChange, 1) + assert.Contains(t, linksAfterChange[0].Target, "/jobs/12345") + + // 8. Close document. + err = cli.Notify(ctx, "textDocument/didClose", DidCloseTextDocumentParams{ + TextDocument: TextDocumentIdentifier{URI: docURI}, + }) + require.NoError(t, err) + + // 9. Document links should return empty after close. + var linksAfterClose []DocumentLink + err = cli.CallResult(ctx, "textDocument/documentLink", DocumentLinkParams{ + TextDocument: TextDocumentIdentifier{URI: docURI}, + }, &linksAfterClose) + require.NoError(t, err) + assert.Empty(t, linksAfterClose) + + // 10. Shutdown. + _, err = cli.Call(ctx, "shutdown", nil) + require.NoError(t, err) +} + +const testBundleYAMLWithInterpolation = `bundle: + name: test-bundle +workspace: + host: "https://my-workspace.databricks.com" +targets: + dev: + default: true +variables: + my_var: + default: "hello" +resources: + jobs: + my_job: + name: "${var.my_var}" + pipelines: + my_pipeline: + name: "My Pipeline" +` + +func TestServerDefinitionOnInterpolation(t *testing.T) { + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "databricks.yml"), []byte(testBundleYAMLWithInterpolation), 0o644)) + + srv := NewServer() + cli := newTestClientServer(t, srv) + ctx := t.Context() + + initializeClient(ctx, t, cli, PathToURI(tmpDir)) + + docURI := PathToURI(filepath.Join(tmpDir, "databricks.yml")) + err := cli.Notify(ctx, "textDocument/didOpen", DidOpenTextDocumentParams{ + TextDocument: TextDocumentItem{ + URI: docURI, + LanguageID: "yaml", + Version: 1, + Text: testBundleYAMLWithInterpolation, + }, + }) + require.NoError(t, err) + + // Find the line with "${var.my_var}" and position cursor on it. + lines := strings.Split(testBundleYAMLWithInterpolation, "\n") + var targetLine int + var targetCol int + for i, line := range lines { + idx := strings.Index(line, "${var.my_var}") + if idx >= 0 { + targetLine = i + targetCol = idx + 2 // inside the "${...}" + break + } + } + + var loc LSPLocation + err = cli.CallResult(ctx, "textDocument/definition", DefinitionParams{ + TextDocument: TextDocumentIdentifier{URI: docURI}, + Position: Position{Line: targetLine, Character: targetCol}, + }, &loc) + require.NoError(t, err) + assert.Contains(t, loc.URI, "databricks.yml") +} + +func TestServerDefinitionOnResourceKey(t *testing.T) { + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "databricks.yml"), []byte(testBundleYAMLWithInterpolation), 0o644)) + + srv := NewServer() + cli := newTestClientServer(t, srv) + ctx := t.Context() + + initializeClient(ctx, t, cli, PathToURI(tmpDir)) + + docURI := PathToURI(filepath.Join(tmpDir, "databricks.yml")) + err := cli.Notify(ctx, "textDocument/didOpen", DidOpenTextDocumentParams{ + TextDocument: TextDocumentItem{ + URI: docURI, + LanguageID: "yaml", + Version: 1, + Text: testBundleYAMLWithInterpolation, + }, + }) + require.NoError(t, err) + + // Find the resource key position. + lines := strings.Split(testBundleYAMLWithInterpolation, "\n") + var myJobLine int + var myJobCol int + for i, line := range lines { + idx := strings.Index(line, "my_job:") + if idx >= 0 { + myJobLine = i + myJobCol = idx + 1 // inside "my_job" + break + } + } + + // Ctrl+click on "my_job" key should return references (${...} expressions referencing it). + // The YAML has name: "${var.my_var}" which does NOT reference my_job, so this may return empty. + rsp, err := cli.Call(ctx, "textDocument/definition", DefinitionParams{ + TextDocument: TextDocumentIdentifier{URI: docURI}, + Position: Position{Line: myJobLine, Character: myJobCol}, + }) + require.NoError(t, err) + assert.NotNil(t, rsp) +} + +func TestServerDefinitionVarShorthand(t *testing.T) { + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "databricks.yml"), []byte(testBundleYAMLWithInterpolation), 0o644)) + + srv := NewServer() + cli := newTestClientServer(t, srv) + ctx := t.Context() + + initializeClient(ctx, t, cli, PathToURI(tmpDir)) + + docURI := PathToURI(filepath.Join(tmpDir, "databricks.yml")) + err := cli.Notify(ctx, "textDocument/didOpen", DidOpenTextDocumentParams{ + TextDocument: TextDocumentItem{ + URI: docURI, + LanguageID: "yaml", + Version: 1, + Text: testBundleYAMLWithInterpolation, + }, + }) + require.NoError(t, err) + + // Find the line with "${var.my_var}" and position cursor on "var" part. + lines := strings.Split(testBundleYAMLWithInterpolation, "\n") + var targetLine int + var targetCol int + for i, line := range lines { + idx := strings.Index(line, "${var.my_var}") + if idx >= 0 { + targetLine = i + targetCol = idx + 2 // on "var" inside "${var.my_var}" + break + } + } + + var loc LSPLocation + err = cli.CallResult(ctx, "textDocument/definition", DefinitionParams{ + TextDocument: TextDocumentIdentifier{URI: docURI}, + Position: Position{Line: targetLine, Character: targetCol}, + }, &loc) + require.NoError(t, err) + assert.Contains(t, loc.URI, "databricks.yml") +} + +func TestServerDefinitionNoMatch(t *testing.T) { + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "databricks.yml"), []byte(testBundleYAMLWithInterpolation), 0o644)) + + srv := NewServer() + cli := newTestClientServer(t, srv) + ctx := t.Context() + + initializeClient(ctx, t, cli, PathToURI(tmpDir)) + + docURI := PathToURI(filepath.Join(tmpDir, "databricks.yml")) + err := cli.Notify(ctx, "textDocument/didOpen", DidOpenTextDocumentParams{ + TextDocument: TextDocumentItem{ + URI: docURI, + LanguageID: "yaml", + Version: 1, + Text: testBundleYAMLWithInterpolation, + }, + }) + require.NoError(t, err) + + // Cursor on line 0, character 0 ("bundle:") — not an interpolation or resource key. + rsp, err := cli.Call(ctx, "textDocument/definition", DefinitionParams{ + TextDocument: TextDocumentIdentifier{URI: docURI}, + Position: Position{Line: 0, Character: 0}, + }) + require.NoError(t, err) + + var result *LSPLocation + err = rsp.UnmarshalResult(&result) + require.NoError(t, err) + assert.Nil(t, result) +} + +func TestServerDefinitionCrossFile(t *testing.T) { + mainYAML := `bundle: + name: test-bundle +include: + - "resources/*.yml" +variables: + my_var: + default: "hello" +` + resourceYAML := `resources: + jobs: + my_job: + name: "${var.my_var}" +` + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "databricks.yml"), []byte(mainYAML), 0o644)) + require.NoError(t, os.MkdirAll(filepath.Join(tmpDir, "resources"), 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "resources", "jobs.yml"), []byte(resourceYAML), 0o644)) + + srv := NewServer() + cli := newTestClientServer(t, srv) + ctx := t.Context() + + initializeClient(ctx, t, cli, PathToURI(tmpDir)) + + // Open the resource file with the interpolation. + resDocURI := PathToURI(filepath.Join(tmpDir, "resources", "jobs.yml")) + err := cli.Notify(ctx, "textDocument/didOpen", DidOpenTextDocumentParams{ + TextDocument: TextDocumentItem{ + URI: resDocURI, + LanguageID: "yaml", + Version: 1, + Text: resourceYAML, + }, + }) + require.NoError(t, err) + + // Find "${var.my_var}" in the resource file. + lines := strings.Split(resourceYAML, "\n") + var targetLine int + var targetCol int + for i, line := range lines { + idx := strings.Index(line, "${var.my_var}") + if idx >= 0 { + targetLine = i + targetCol = idx + 2 + break + } + } + + // Definition should resolve to variables.my_var in the main config file. + var loc LSPLocation + err = cli.CallResult(ctx, "textDocument/definition", DefinitionParams{ + TextDocument: TextDocumentIdentifier{URI: resDocURI}, + Position: Position{Line: targetLine, Character: targetCol}, + }, &loc) + require.NoError(t, err) + assert.Contains(t, loc.URI, "databricks.yml") +} + +func TestServerHoverMultiTarget(t *testing.T) { + bundleYAML := `bundle: + name: test-bundle +workspace: + host: "https://default.databricks.com" +targets: + dev: + default: true + workspace: + host: "https://dev.databricks.com" + prod: + workspace: + host: "https://prod.databricks.com" +resources: + jobs: + my_job: + name: "My Job" +` + tmpDir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(tmpDir, "databricks.yml"), []byte(bundleYAML), 0o644)) + + // Create state for both targets. + for _, target := range []struct { + name string + id string + }{ + {"dev", "111"}, + {"prod", "222"}, + } { + stateDir := filepath.Join(tmpDir, ".databricks", "bundle", target.name) + require.NoError(t, os.MkdirAll(stateDir, 0o755)) + stateJSON := `{"state_version": 1, "state": {"resources.jobs.my_job": {"__id__": "` + target.id + `"}}}` + require.NoError(t, os.WriteFile(filepath.Join(stateDir, "resources.json"), []byte(stateJSON), 0o644)) + } + + srv := NewServer() + cli := newTestClientServer(t, srv) + ctx := t.Context() + + initializeClient(ctx, t, cli, PathToURI(tmpDir)) + + docURI := PathToURI(filepath.Join(tmpDir, "databricks.yml")) + err := cli.Notify(ctx, "textDocument/didOpen", DidOpenTextDocumentParams{ + TextDocument: TextDocumentItem{ + URI: docURI, + LanguageID: "yaml", + Version: 1, + Text: bundleYAML, + }, + }) + require.NoError(t, err) + + // Find the position of my_job key. + lines := strings.Split(bundleYAML, "\n") + var myJobLine int + var myJobCol int + for i, line := range lines { + idx := strings.Index(line, "my_job:") + if idx >= 0 { + myJobLine = i + myJobCol = idx + 1 + break + } + } + + var hover Hover + err = cli.CallResult(ctx, "textDocument/hover", HoverParams{ + TextDocument: TextDocumentIdentifier{URI: docURI}, + Position: Position{Line: myJobLine, Character: myJobCol}, + }, &hover) + require.NoError(t, err) + assert.Contains(t, hover.Contents.Value, "dev") + assert.Contains(t, hover.Contents.Value, "prod") + assert.Contains(t, hover.Contents.Value, "111") + assert.Contains(t, hover.Contents.Value, "222") + assert.Contains(t, hover.Contents.Value, "Open in Databricks") +} diff --git a/bundle/lsp/server_test.go b/bundle/lsp/server_test.go new file mode 100644 index 0000000000..9ad209bf90 --- /dev/null +++ b/bundle/lsp/server_test.go @@ -0,0 +1,417 @@ +package lsp_test + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/databricks/cli/bundle/lsp" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/yamlloader" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestIndexResources(t *testing.T) { + yaml := ` +resources: + jobs: + my_etl_job: + name: "ETL Job" + pipelines: + data_pipeline: + name: "Pipeline" +` + v, err := yamlloader.LoadYAML("test.yml", strings.NewReader(yaml)) + require.NoError(t, err) + + doc := &lsp.Document{ + URI: "file:///test.yml", + Content: yaml, + Lines: strings.Split(yaml, "\n"), + Value: v, + } + + entries := lsp.IndexResources(doc) + require.Len(t, entries, 2) + + // Verify first entry (jobs.my_etl_job). + assert.Equal(t, "jobs", entries[0].Type) + assert.Equal(t, "my_etl_job", entries[0].Key) + assert.Equal(t, "resources.jobs.my_etl_job", entries[0].Path) + // Key should span the length of "my_etl_job". + assert.Equal(t, entries[0].KeyRange.Start.Character+len("my_etl_job"), entries[0].KeyRange.End.Character) + + // Verify second entry (pipelines.data_pipeline). + assert.Equal(t, "pipelines", entries[1].Type) + assert.Equal(t, "data_pipeline", entries[1].Key) + assert.Equal(t, "resources.pipelines.data_pipeline", entries[1].Path) +} + +func TestIndexResourcesInvalidYAML(t *testing.T) { + doc := &lsp.Document{ + URI: "file:///bad.yml", + Value: dyn.InvalidValue, + } + + entries := lsp.IndexResources(doc) + assert.Nil(t, entries) +} + +func TestIndexResourcesNoResources(t *testing.T) { + yaml := ` +bundle: + name: "test" +` + v, err := yamlloader.LoadYAML("test.yml", strings.NewReader(yaml)) + require.NoError(t, err) + + doc := &lsp.Document{ + URI: "file:///test.yml", + Value: v, + } + + entries := lsp.IndexResources(doc) + assert.Nil(t, entries) +} + +func TestPositionInRange(t *testing.T) { + tests := []struct { + name string + pos lsp.Position + r lsp.Range + expected bool + }{ + { + name: "inside range", + pos: lsp.Position{Line: 3, Character: 5}, + r: lsp.Range{Start: lsp.Position{Line: 3, Character: 4}, End: lsp.Position{Line: 3, Character: 14}}, + expected: true, + }, + { + name: "at start of range", + pos: lsp.Position{Line: 3, Character: 4}, + r: lsp.Range{Start: lsp.Position{Line: 3, Character: 4}, End: lsp.Position{Line: 3, Character: 14}}, + expected: true, + }, + { + name: "at end of range (exclusive)", + pos: lsp.Position{Line: 3, Character: 14}, + r: lsp.Range{Start: lsp.Position{Line: 3, Character: 4}, End: lsp.Position{Line: 3, Character: 14}}, + expected: false, + }, + { + name: "before range", + pos: lsp.Position{Line: 3, Character: 2}, + r: lsp.Range{Start: lsp.Position{Line: 3, Character: 4}, End: lsp.Position{Line: 3, Character: 14}}, + expected: false, + }, + { + name: "after range", + pos: lsp.Position{Line: 3, Character: 20}, + r: lsp.Range{Start: lsp.Position{Line: 3, Character: 4}, End: lsp.Position{Line: 3, Character: 14}}, + expected: false, + }, + { + name: "wrong line above", + pos: lsp.Position{Line: 2, Character: 5}, + r: lsp.Range{Start: lsp.Position{Line: 3, Character: 4}, End: lsp.Position{Line: 3, Character: 14}}, + expected: false, + }, + { + name: "wrong line below", + pos: lsp.Position{Line: 4, Character: 5}, + r: lsp.Range{Start: lsp.Position{Line: 3, Character: 4}, End: lsp.Position{Line: 3, Character: 14}}, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, lsp.PositionInRange(tt.pos, tt.r)) + }) + } +} + +func TestBuildResourceURL(t *testing.T) { + host := "https://my-workspace.databricks.com" + + tests := []struct { + resourceType string + id string + expected string + }{ + {"jobs", "123", host + "/jobs/123"}, + {"pipelines", "abc-def", host + "/pipelines/abc-def"}, + {"dashboards", "d1", host + "/dashboardsv3/d1/published"}, + {"model_serving_endpoints", "ep1", host + "/ml/endpoints/ep1"}, + {"experiments", "exp1", host + "/ml/experiments/exp1"}, + {"models", "m1", host + "/ml/models/m1"}, + {"clusters", "c1", host + "/compute/clusters/c1"}, + {"apps", "a1", host + "/apps/a1"}, + {"alerts", "al1", host + "/sql/alerts-v2/al1"}, + {"sql_warehouses", "sw1", host + "/sql/warehouses/sw1"}, + {"quality_monitors", "qm1", host + "/explore/data/qm1"}, + {"secret_scopes", "ss1", host + "/secrets/scopes/ss1"}, + {"unknown_type", "x1", host}, + } + + for _, tt := range tests { + t.Run(tt.resourceType, func(t *testing.T) { + assert.Equal(t, tt.expected, lsp.BuildResourceURL(host, tt.resourceType, tt.id)) + }) + } +} + +func TestBuildResourceURLEmptyInputs(t *testing.T) { + assert.Equal(t, "", lsp.BuildResourceURL("", "jobs", "123")) + assert.Equal(t, "", lsp.BuildResourceURL("https://host.com", "jobs", "")) +} + +func TestBuildResourceURLTrailingSlash(t *testing.T) { + assert.Equal(t, "https://host.com/jobs/123", lsp.BuildResourceURL("https://host.com/", "jobs", "123")) +} + +func TestLoadWorkspaceHost(t *testing.T) { + yaml := ` +workspace: + host: "https://my-workspace.databricks.com" +` + v, err := yamlloader.LoadYAML("test.yml", strings.NewReader(yaml)) + require.NoError(t, err) + + assert.Equal(t, "https://my-workspace.databricks.com", lsp.LoadWorkspaceHost(v)) +} + +func TestLoadWorkspaceHostWithInterpolation(t *testing.T) { + yaml := ` +workspace: + host: "${var.host}" +` + v, err := yamlloader.LoadYAML("test.yml", strings.NewReader(yaml)) + require.NoError(t, err) + + assert.Equal(t, "", lsp.LoadWorkspaceHost(v)) +} + +func TestLoadWorkspaceHostMissing(t *testing.T) { + yaml := ` +bundle: + name: "test" +` + v, err := yamlloader.LoadYAML("test.yml", strings.NewReader(yaml)) + require.NoError(t, err) + + assert.Equal(t, "", lsp.LoadWorkspaceHost(v)) +} + +func TestLoadTarget(t *testing.T) { + tests := []struct { + name string + yaml string + expected string + }{ + { + name: "default target marked", + yaml: ` +targets: + dev: + default: true + prod: + workspace: + host: "https://prod.databricks.com" +`, + expected: "dev", + }, + { + name: "no default returns first", + yaml: ` +targets: + staging: + workspace: + host: "https://staging.databricks.com" + prod: + workspace: + host: "https://prod.databricks.com" +`, + expected: "staging", + }, + { + name: "no targets section", + yaml: ` +bundle: + name: "test" +`, + expected: "default", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + v, err := yamlloader.LoadYAML("test.yml", strings.NewReader(tt.yaml)) + require.NoError(t, err) + assert.Equal(t, tt.expected, lsp.LoadTarget(v)) + }) + } +} + +func TestURIToPathRoundTrip(t *testing.T) { + // Test that PathToURI and URIToPath are inverses of each other. + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "databricks.yml") + uri := lsp.PathToURI(path) + assert.Equal(t, path, lsp.URIToPath(uri)) +} + +func TestDocumentStoreOpenGetClose(t *testing.T) { + store := lsp.NewDocumentStore() + + assert.Nil(t, store.Get("file:///test.yml")) + + store.Open("file:///test.yml", 1, "key: value") + doc := store.Get("file:///test.yml") + require.NotNil(t, doc) + assert.Equal(t, 1, doc.Version) + assert.Equal(t, "key: value", doc.Content) + assert.True(t, doc.Value.IsValid()) + + store.Close("file:///test.yml") + assert.Nil(t, store.Get("file:///test.yml")) +} + +func TestDocumentStoreChange(t *testing.T) { + store := lsp.NewDocumentStore() + + store.Open("file:///test.yml", 1, "key: value") + store.Change("file:///test.yml", 2, "key: updated") + + doc := store.Get("file:///test.yml") + require.NotNil(t, doc) + assert.Equal(t, 2, doc.Version) + assert.Equal(t, "key: updated", doc.Content) + assert.True(t, doc.Value.IsValid()) +} + +func TestDocumentStoreParseInvalidYAML(t *testing.T) { + store := lsp.NewDocumentStore() + store.Open("file:///bad.yml", 1, "{{{{invalid yaml") + doc := store.Get("file:///bad.yml") + require.NotNil(t, doc) + assert.False(t, doc.Value.IsValid()) +} + +func TestLoadResourceStateDirectEngine(t *testing.T) { + tmpDir := t.TempDir() + stateDir := filepath.Join(tmpDir, ".databricks", "bundle", "dev") + require.NoError(t, os.MkdirAll(stateDir, 0o755)) + + stateJSON := `{ + "state_version": 1, + "state": { + "resources.jobs.etl": {"__id__": "111"}, + "resources.pipelines.dlt": {"__id__": "222"} + } + }` + require.NoError(t, os.WriteFile(filepath.Join(stateDir, "resources.json"), []byte(stateJSON), 0o644)) + + result := lsp.LoadResourceState(tmpDir, "dev") + assert.Equal(t, "111", result["resources.jobs.etl"].ID) + assert.Equal(t, "222", result["resources.pipelines.dlt"].ID) +} + +func TestLoadResourceStateTerraform(t *testing.T) { + tmpDir := t.TempDir() + stateDir := filepath.Join(tmpDir, ".databricks", "bundle", "dev", "terraform") + require.NoError(t, os.MkdirAll(stateDir, 0o755)) + + tfState := `{ + "version": 4, + "resources": [ + { + "type": "databricks_job", + "name": "etl_job", + "mode": "managed", + "instances": [{"attributes": {"id": "333", "name": "ETL Job"}}] + }, + { + "type": "databricks_pipeline", + "name": "dlt_pipeline", + "mode": "managed", + "instances": [{"attributes": {"id": "444", "name": "DLT Pipeline"}}] + } + ] + }` + require.NoError(t, os.WriteFile(filepath.Join(stateDir, "terraform.tfstate"), []byte(tfState), 0o644)) + + result := lsp.LoadResourceState(tmpDir, "dev") + assert.Equal(t, "333", result["resources.jobs.etl_job"].ID) + assert.Equal(t, "ETL Job", result["resources.jobs.etl_job"].Name) + assert.Equal(t, "444", result["resources.pipelines.dlt_pipeline"].ID) + assert.Equal(t, "DLT Pipeline", result["resources.pipelines.dlt_pipeline"].Name) +} + +func TestLoadResourceStateNoState(t *testing.T) { + result := lsp.LoadResourceState("/nonexistent", "dev") + assert.Empty(t, result) +} + +func TestLoadAllTargets(t *testing.T) { + yaml := ` +targets: + dev: + default: true + staging: + workspace: + host: "https://staging.databricks.com" + prod: + workspace: + host: "https://prod.databricks.com" +` + v, err := yamlloader.LoadYAML("test.yml", strings.NewReader(yaml)) + require.NoError(t, err) + + targets := lsp.LoadAllTargets(v) + require.Len(t, targets, 3) + assert.Equal(t, "dev", targets[0]) + assert.Equal(t, "staging", targets[1]) + assert.Equal(t, "prod", targets[2]) +} + +func TestLoadAllTargetsNoTargets(t *testing.T) { + yaml := ` +bundle: + name: "test" +` + v, err := yamlloader.LoadYAML("test.yml", strings.NewReader(yaml)) + require.NoError(t, err) + + targets := lsp.LoadAllTargets(v) + assert.Nil(t, targets) +} + +func TestLoadTargetWorkspaceHost(t *testing.T) { + yaml := ` +workspace: + host: "https://default.databricks.com" +targets: + dev: + workspace: + host: "https://dev.databricks.com" + prod: + workspace: + host: "https://prod.databricks.com" + staging: {} +` + v, err := yamlloader.LoadYAML("test.yml", strings.NewReader(yaml)) + require.NoError(t, err) + + assert.Equal(t, "https://dev.databricks.com", lsp.LoadTargetWorkspaceHost(v, "dev")) + assert.Equal(t, "https://prod.databricks.com", lsp.LoadTargetWorkspaceHost(v, "prod")) + // staging has no override, falls back to root. + assert.Equal(t, "https://default.databricks.com", lsp.LoadTargetWorkspaceHost(v, "staging")) +} + +func TestPathToURI(t *testing.T) { + assert.Equal(t, "file:///home/user/file.yml", lsp.PathToURI("/home/user/file.yml")) +} diff --git a/bundle/lsp/state.go b/bundle/lsp/state.go new file mode 100644 index 0000000000..4ca4fbebb2 --- /dev/null +++ b/bundle/lsp/state.go @@ -0,0 +1,257 @@ +package lsp + +import ( + "encoding/json" + "os" + "path/filepath" + "strings" + + "github.com/databricks/cli/libs/dyn" +) + +// ResourceInfo holds deployment info for a resource. +type ResourceInfo struct { + ID string + URL string + Name string +} + +// LoadResourceState reads the local deployment state to get resource IDs. +// It tries direct engine state first, then terraform state. +// Returns a map from resource path (e.g., "resources.jobs.my_job") to ResourceInfo. +func LoadResourceState(bundleRoot, target string) map[string]ResourceInfo { + result := make(map[string]ResourceInfo) + + // Try direct engine state first. + directPath := filepath.Join(bundleRoot, ".databricks", "bundle", target, "resources.json") + if info := loadDirectState(directPath); info != nil { + for k, v := range info { + result[k] = v + } + return result + } + + // Try terraform state. + tfPath := filepath.Join(bundleRoot, ".databricks", "bundle", target, "terraform", "terraform.tfstate") + if info := loadTerraformState(tfPath); info != nil { + for k, v := range info { + result[k] = v + } + } + + return result +} + +type directState struct { + State map[string]directResourceEntry `json:"state"` +} + +type directResourceEntry struct { + ID string `json:"__id__"` +} + +func loadDirectState(path string) map[string]ResourceInfo { + data, err := os.ReadFile(path) + if err != nil { + return nil + } + + var state directState + if err := json.Unmarshal(data, &state); err != nil { + return nil + } + + result := make(map[string]ResourceInfo) + for key, entry := range state.State { + result[key] = ResourceInfo{ID: entry.ID} + } + return result +} + +type terraformState struct { + Version int `json:"version"` + Resources []terraformResource `json:"resources"` +} + +type terraformResource struct { + Type string `json:"type"` + Name string `json:"name"` + Mode string `json:"mode"` + Instances []terraformInstance `json:"instances"` +} + +type terraformInstance struct { + Attributes terraformAttributes `json:"attributes"` +} + +type terraformAttributes struct { + ID string `json:"id"` + Name string `json:"name"` +} + +// terraformToGroup maps terraform resource types to bundle resource group names. +var terraformToGroup = map[string]string{ + "databricks_job": "jobs", + "databricks_pipeline": "pipelines", + "databricks_mlflow_model": "models", + "databricks_mlflow_experiment": "experiments", + "databricks_model_serving": "model_serving_endpoints", + "databricks_registered_model": "registered_models", + "databricks_quality_monitor": "quality_monitors", + "databricks_catalog": "catalogs", + "databricks_schema": "schemas", + "databricks_volume": "volumes", + "databricks_cluster": "clusters", + "databricks_sql_dashboard": "dashboards", + "databricks_dashboard": "dashboards", + "databricks_app": "apps", + "databricks_secret_scope": "secret_scopes", + "databricks_sql_alert": "alerts", +} + +func loadTerraformState(path string) map[string]ResourceInfo { + data, err := os.ReadFile(path) + if err != nil { + return nil + } + + var state terraformState + if err := json.Unmarshal(data, &state); err != nil { + return nil + } + + if state.Version != 4 { + return nil + } + + result := make(map[string]ResourceInfo) + for _, r := range state.Resources { + if r.Mode != "managed" { + continue + } + group, ok := terraformToGroup[r.Type] + if !ok { + continue + } + if len(r.Instances) == 0 { + continue + } + + id := r.Instances[0].Attributes.ID + name := r.Instances[0].Attributes.Name + key := "resources." + group + "." + r.Name + + result[key] = ResourceInfo{ + ID: id, + Name: name, + } + } + + return result +} + +// LoadWorkspaceHost extracts the workspace host from the bundle YAML config. +func LoadWorkspaceHost(v dyn.Value) string { + host := v.Get("workspace").Get("host") + s, ok := host.AsString() + if !ok { + return "" + } + // Skip interpolation references like ${var.host}. + if strings.Contains(s, "${") { + return "" + } + return s +} + +// LoadTarget extracts the default target name from the bundle config. +func LoadTarget(v dyn.Value) string { + targets := v.Get("targets") + if targets.Kind() != dyn.KindMap { + return "default" + } + tm, ok := targets.AsMap() + if !ok { + return "default" + } + + // Look for a target with default: true. + for _, pair := range tm.Pairs() { + d := pair.Value.Get("default") + if b, ok := d.AsBool(); ok && b { + return pair.Key.MustString() + } + } + + // Return first target if none marked as default. + if tm.Len() > 0 { + return tm.Pairs()[0].Key.MustString() + } + return "default" +} + +// LoadAllTargets returns all target names from the parsed config. +func LoadAllTargets(v dyn.Value) []string { + targets := v.Get("targets") + if targets.Kind() != dyn.KindMap { + return nil + } + tm, ok := targets.AsMap() + if !ok { + return nil + } + var names []string + for _, pair := range tm.Pairs() { + names = append(names, pair.Key.MustString()) + } + return names +} + +// LoadTargetWorkspaceHost extracts workspace host from a specific target override, +// falling back to the root-level workspace host. +func LoadTargetWorkspaceHost(v dyn.Value, target string) string { + // Try target-specific override first. + host := v.Get("targets").Get(target).Get("workspace").Get("host") + if s, ok := host.AsString(); ok && !strings.Contains(s, "${") { + return s + } + // Fall back to root-level. + return LoadWorkspaceHost(v) +} + +// BuildResourceURL constructs the workspace URL for a resource. +func BuildResourceURL(host, resourceType, id string) string { + if host == "" || id == "" { + return "" + } + host = strings.TrimRight(host, "/") + + switch resourceType { + case "jobs": + return host + "/jobs/" + id + case "pipelines": + return host + "/pipelines/" + id + case "dashboards": + return host + "/dashboardsv3/" + id + "/published" + case "model_serving_endpoints": + return host + "/ml/endpoints/" + id + case "experiments": + return host + "/ml/experiments/" + id + case "models": + return host + "/ml/models/" + id + case "clusters": + return host + "/compute/clusters/" + id + case "apps": + return host + "/apps/" + id + case "alerts": + return host + "/sql/alerts-v2/" + id + case "sql_warehouses": + return host + "/sql/warehouses/" + id + case "quality_monitors": + return host + "/explore/data/" + id + case "secret_scopes": + return host + "/secrets/scopes/" + id + default: + return host + } +} diff --git a/cmd/experimental/experimental.go b/cmd/experimental/experimental.go index 7e7d376fea..9612871edb 100644 --- a/cmd/experimental/experimental.go +++ b/cmd/experimental/experimental.go @@ -21,6 +21,7 @@ development. They may change or be removed in future versions without notice.`, } cmd.AddCommand(mcp.NewMcpCmd()) + cmd.AddCommand(newBundleLspCommand()) return cmd } diff --git a/cmd/experimental/lsp.go b/cmd/experimental/lsp.go new file mode 100644 index 0000000000..b242da96cd --- /dev/null +++ b/cmd/experimental/lsp.go @@ -0,0 +1,28 @@ +package experimental + +import ( + "github.com/databricks/cli/bundle/lsp" + "github.com/databricks/cli/cmd/root" + "github.com/spf13/cobra" +) + +func newBundleLspCommand() *cobra.Command { + var target string + + cmd := &cobra.Command{ + Use: "bundle-lsp", + Short: "Start a Language Server Protocol server for bundle files", + Args: root.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + srv := lsp.NewServer() + if target != "" { + srv.SetTarget(target) + } + return srv.Run(cmd.Context()) + }, + } + + cmd.Flags().StringVar(&target, "target", "", "Bundle target to use for resource resolution") + + return cmd +} diff --git a/design/interpolation-parser.md b/design/interpolation-parser.md new file mode 100644 index 0000000000..2495949adf --- /dev/null +++ b/design/interpolation-parser.md @@ -0,0 +1,473 @@ +# Proposal: Replace Regex-Based Variable Interpolation with a Proper Parser + +## Status: Draft + +## Background + +DABs variable interpolation (`${...}` syntax) is currently implemented via regex +matching in `libs/dyn/dynvar/ref.go`: + +```go +baseVarDef = `[a-zA-Z]+([-_]*[a-zA-Z0-9]+)*` +re = regexp.MustCompile( + fmt.Sprintf(`\$\{(%s(\.%s(\[[0-9]+\])*)*(\[[0-9]+\])*)\}`, baseVarDef, baseVarDef), +) +``` + +This regex is used in `NewRef()` via `FindAllStringSubmatch` to extract all +`${...}` references from a string. The matched substrings are then resolved by +the pipeline in `resolve.go` (collect → resolve → replace). + +### Problems with the Regex Approach + +1. **No error reporting.** A non-match produces zero information — the user + gets no feedback about *why* `${foo.bar-}` or `${foo..bar}` is silently + ignored. Invalid references are indistinguishable from literal text. + +2. **No position information.** Errors cannot point to the character where + parsing fails. When resolution *does* fail, the error messages refer to the + matched string but not its location within the original value. + +3. **Hard to extend.** Adding new syntax (e.g., default values like + `${var.name:-default}`, or function calls like `${upper(var.name)}`) requires + modifying a regex that is already at the edge of readability. + +4. **No escape mechanism.** There is no way to produce a literal `${` in a + string. Users who need `${` in their output (e.g., shell scripts, ARM + templates) have no workaround. + +5. **Dual maintenance burden.** The regex must be kept in sync with a Python + regex in `python/databricks/bundles/core/_transform.py` — a fragile + arrangement with no automated enforcement. + +6. **Silent acceptance of ambiguous input.** The regex approach cannot + distinguish between "this string has no variable references" and "this string + has a malformed variable reference that should be reported." + +## Research: How Other Systems Parse `${...}` + +| System | Strategy | Escape | Nesting | Error Quality | +|--------|----------|--------|---------|---------------| +| Go `text/template` | State-function lexer | None | Paren depth | Line + template name | +| HCL2 (Terraform) | Ragel FSM + recursive descent | `$${` → literal `${` | Brace depth stack | Source range + suggestions | +| Python f-strings (PEP 701) | Mode-stack tokenizer | `{{` → `{` | Mode stack | Line/column | +| Rust `format!` | Iterator-based descent | `{{`/`}}` | N/A | Spans + suggestions | +| Bash | Char-by-char + depth tracking | `\$` | Full recursive | Line number | + +**Key insight from the research:** For a syntax as simple as `${path.to.var[0]}` +(no nested expressions, no function calls, no operators), a full recursive +descent parser is overkill. The right tool is a **two-mode character scanner** — +the same pattern used by Go's `text/template` lexer and HCL's scanner at their +core. This gives us proper error reporting, escape support, and extensibility +without the complexity of a full parser generator. + +## Proposed Design + +### Architecture: Two-Phase Scanner + +Replace the regex with a small, explicit scanner that operates in two modes: + +``` +Mode 1: TEXT — accumulate literal characters +Mode 2: REFERENCE — accumulate variable path characters inside ${...} +``` + +The scanner produces a flat list of tokens. No AST, no recursive descent — just +a linear scan that cleanly separates literal text from variable references. + +### Token Types + +```go +// TokenKind represents the type of a parsed token. +type TokenKind int + +const ( + TokenLiteral TokenKind = iota // Literal text (no interpolation) + TokenRef // Variable reference: content between ${ and } +) +``` + +### Core Data Structure + +```go +// Token represents a parsed segment of an interpolation string. +type Token struct { + Kind TokenKind + Value string // For Literal: the text. For Ref: the variable path (e.g., "a.b[0].c"). + Start int // Byte offset of the start of this token in the original string. + End int // Byte offset past the end of this token. +} +``` + +### Scanner Implementation + +```go +// Parse parses a string that may contain ${...} variable references. +// It returns a slice of tokens representing literal text and variable references. +// +// Escape sequences: +// - "$$" produces a literal "$" (only when followed by "{") +// +// Examples: +// - "hello" → [Literal("hello")] +// - "${a.b}" → [Ref("a.b")] +// - "pre ${a.b} post" → [Literal("pre "), Ref("a.b"), Literal(" post")] +// - "$${a.b}" → [Literal("${a.b}")] +// - "${a.b} ${c[0]}" → [Ref("a.b"), Literal(" "), Ref("c[0]")] +func Parse(s string) ([]Token, error) { + var tokens []Token + i := 0 + buf := strings.Builder{} // accumulates literal text + + flushLiteral := func(end int) { + if buf.Len() > 0 { + tokens = append(tokens, Token{ + Kind: TokenLiteral, + Value: buf.String(), + Start: end - buf.Len(), + End: end, + }) + buf.Reset() + } + } + + for i < len(s) { + if s[i] != '$' { + buf.WriteByte(s[i]) + i++ + continue + } + + // We see '$'. Look ahead. + if i+1 >= len(s) { + // Trailing '$' — treat as literal. + buf.WriteByte('$') + i++ + continue + } + + switch s[i+1] { + case '$': + // Escape: "$$" → literal "$". + buf.WriteByte('$') + i += 2 + + case '{': + // Start of variable reference. + flushLiteral(i) + refStart := i + i += 2 // skip "${" + + // Scan the variable path until we find '}'. + pathStart := i + for i < len(s) && s[i] != '}' { + i++ + } + + if i >= len(s) { + return nil, fmt.Errorf( + "unterminated variable reference at position %d", + refStart, + ) + } + + path := s[pathStart:i] + i++ // skip '}' + + if path == "" { + return nil, fmt.Errorf( + "empty variable reference at position %d", + refStart, + ) + } + + // Validate the path content. + if err := validatePath(path, refStart); err != nil { + return nil, err + } + + tokens = append(tokens, Token{ + Kind: TokenRef, + Value: path, + Start: refStart, + End: i, + }) + + default: + // '$' not followed by '$' or '{' — treat as literal. + buf.WriteByte('$') + i++ + } + } + + flushLiteral(i) + return tokens, nil +} +``` + +### Path Validation + +Rather than encoding path rules in the regex, validate path contents explicitly +after extraction. This function reuses the existing `dyn.NewPathFromString` but +adds character-level error reporting: + +```go +// validatePath checks that a variable path is well-formed. +// It wraps dyn.NewPathFromString with position-aware error messages. +func validatePath(path string, refStart int) error { + _, err := dyn.NewPathFromString(path) + if err != nil { + return fmt.Errorf( + "invalid variable reference ${%s} at position %d: %w", + path, refStart, err, + ) + } + return nil +} +``` + +We should also add validation for the character set used in path segments. The +current regex implicitly enforces `[a-zA-Z]` start and `[a-zA-Z0-9_-]` +continuation. This should move to an explicit check inside path validation: + +```go +func validatePathSegment(seg string) error { + if len(seg) == 0 { + return fmt.Errorf("empty path segment") + } + if seg[0] < 'A' || (seg[0] > 'Z' && seg[0] < 'a') || seg[0] > 'z' { + return fmt.Errorf("path segment must start with a letter, got %q", seg[0]) + } + // ... check continuation characters ... +} +``` + +### Updated Ref Type + +The `Ref` struct changes from storing raw regex match groups to storing parsed +tokens: + +```go +type Ref struct { + Value dyn.Value // Original dyn.Value. + Str string // Original string content. + Tokens []Token // Parsed tokens (literals and references). +} + +func NewRef(v dyn.Value) (Ref, bool) { + s, ok := v.AsString() + if !ok { + return Ref{}, false + } + + tokens, err := Parse(s) + if err != nil { + // Return error through a new error-aware API (see Migration section). + return Ref{}, false + } + + // Check if any token is a reference. + hasRef := false + for _, t := range tokens { + if t.Kind == TokenRef { + hasRef = true + break + } + } + if !hasRef { + return Ref{}, false + } + + return Ref{Value: v, Str: s, Tokens: tokens}, true +} +``` + +### Updated Resolution Logic + +The string interpolation in `resolveRef` simplifies from a regex-replacement +loop to a token-concatenation loop: + +```go +func (r *resolver) resolveRef(ref Ref, seen []string) (dyn.Value, error) { + deps := ref.References() + resolved := make([]dyn.Value, len(deps)) + complete := true + + // ... resolve deps (unchanged) ... + + // Pure substitution (single ref, no literals). + if ref.IsPure() && complete { + return dyn.NewValue(resolved[0].Value(), ref.Value.Locations()), nil + } + + // String interpolation: concatenate tokens. + var buf strings.Builder + refIdx := 0 + for _, tok := range ref.Tokens { + switch tok.Kind { + case TokenLiteral: + buf.WriteString(tok.Value) + case TokenRef: + if !resolved[refIdx].IsValid() { + // Skipped — write original ${...} back. + buf.WriteString("${") + buf.WriteString(tok.Value) + buf.WriteByte('}') + } else { + s, err := valueToString(resolved[refIdx]) + if err != nil { + return dyn.InvalidValue, err + } + buf.WriteString(s) + } + refIdx++ + } + } + + return dyn.NewValue(buf.String(), ref.Value.Locations()), nil +} +``` + +This is cleaner than the current approach which uses `strings.Replace` with a +count of 1 — a trick needed to avoid double-replacing when the same variable +appears multiple times. + +### Helper Methods on Ref + +```go +// IsPure returns true if the string is a single variable reference with no +// surrounding text (e.g., "${a.b}" but not "x ${a.b}" or "${a} ${b}"). +func (v Ref) IsPure() bool { + return len(v.Tokens) == 1 && v.Tokens[0].Kind == TokenRef +} + +// References returns the variable paths referenced in this string. +func (v Ref) References() []string { + var out []string + for _, t := range v.Tokens { + if t.Kind == TokenRef { + out = append(out, t.Value) + } + } + return out +} +``` + +### Escape Sequence: `$$` + +Following HCL2's precedent, `$$` before `{` produces a literal `$`. This is the +most natural escape for users already familiar with Terraform/HCL: + +| Input | Output | +|-------|--------| +| `${a.b}` | *(resolved value of a.b)* | +| `$${a.b}` | `${a.b}` (literal) | +| `$$notbrace` | `$notbrace` (literal) | +| `$notbrace` | `$notbrace` (literal) | + +This is backward compatible: `$$` is not a valid prefix today (the regex +requires `${`), so no existing config uses `$$` in a way that would change +meaning. + +## File Changes + +| File | Change | +|------|--------| +| `libs/dyn/dynvar/ref.go` | Replace regex + `Matches` with `Parse()` + `[]Token` | +| `libs/dyn/dynvar/ref_test.go` | Update tests: add parser tests, keep behavioral tests | +| `libs/dyn/dynvar/resolve.go` | Update `resolveRef` to use token concatenation | +| `libs/dyn/dynvar/resolve_test.go` | Add tests for escape sequences, error messages | +| `libs/dyn/dynvar/parse.go` | **New file**: scanner + token types | +| `libs/dyn/dynvar/parse_test.go` | **New file**: scanner unit tests | +| `python/databricks/bundles/core/_transform.py` | Update Python side to match (separate PR) | + +## Migration Strategy + +### Phase 1: Add Parser, Keep Regex + +1. Implement `Parse()` in a new `parse.go` file with full test coverage. +2. Add a `NewRefWithDiagnostics(v dyn.Value) (Ref, diag.Diagnostics)` that + uses the parser and can return warnings for malformed references. +3. Keep the existing `NewRef` as-is, calling the parser internally but falling + back to the regex for any parse errors (belt-and-suspenders). +4. Add logging when the parser and regex disagree, to catch discrepancies. + +### Phase 2: Switch Over + +1. Remove the regex fallback — `NewRef` uses only the parser. +2. Update `Ref` to store `[]Token` instead of `[][]string`. +3. Update `resolveRef` to use token concatenation. +4. Remove the `re` variable. + +### Phase 3: Add Escape Support + +1. Enable `$$` escape handling in the parser. +2. Document the escape sequence. +3. Update the Python implementation. + +## Compatibility + +- **Forward compatible:** All strings that currently contain valid `${...}` + references will parse identically. The parser accepts a strict superset of + the regex (it can also report errors for malformed references). + +- **Backward compatible escape:** `$$` before `{` is a new feature, not a + breaking change. No existing valid config contains `$${` (the regex would not + match it, and a literal `$${` in YAML has no special meaning today). + +- **Error reporting is additive:** Strings that silently failed to match the + regex will now produce actionable error messages. This is a UX improvement, + not a breaking change, though it could surface new warnings for configs that + previously "worked" by accident (e.g., a typo like `${foo.bar-}` was silently + treated as literal text). + +## Testing Plan + +1. **Parser unit tests** (`parse_test.go`): + - Valid references: single, multiple, with indices, with hyphens/underscores + - Escape sequences: `$$`, `$` at end of string, `$` before non-`{` + - Error cases: unterminated `${`, empty `${}`, invalid characters in path + - Position tracking: verify `Start`/`End` offsets are correct + +2. **Ref behavioral tests** (`ref_test.go`): + - All existing tests pass unchanged + - New tests for `IsPure()` and `References()` using token-based `Ref` + +3. **Resolution tests** (`resolve_test.go`): + - All existing tests pass unchanged + - New tests for escape sequences in interpolation + - New tests verifying improved error messages + +4. **Acceptance tests**: + - Add acceptance test with `$$` escape in `databricks.yml` + - Verify existing acceptance tests pass without output changes + +## Why Not a More Powerful Parser? + +A recursive descent parser or parser combinator would allow richer syntax (nested +expressions, function calls, filters). We deliberately avoid this because: + +1. **YAGNI.** The current `${path.to.var[0]}` syntax covers all use cases. There + are no open feature requests for computed expressions inside `${...}`. + +2. **Two implementations.** Any syntax change must be mirrored in the Python + implementation. A simple scanner is easy to port; a recursive descent parser + is not. + +3. **Terraform alignment.** DABs variable references are conceptually similar to + HCL variable references. Keeping the syntax simple avoids user confusion + about what expressions are supported. + +If we ever need richer expressions, the token-based architecture makes it easy to +add a parser layer on top of the scanner without changing the `Ref`/`Token` types +or the resolution pipeline. + +## Summary + +Replace the regex in `dynvar/ref.go` with a ~80-line character scanner that: +- Produces the same results for all valid inputs +- Reports actionable errors for invalid inputs (with byte positions) +- Supports `$$` escape for literal `${` output +- Is straightforward to read, test, and extend +- Simplifies the interpolation logic in `resolve.go` from regex-replacement to + token concatenation diff --git a/design/variable-lookup-suggestions.md b/design/variable-lookup-suggestions.md new file mode 100644 index 0000000000..8e8fa221a4 --- /dev/null +++ b/design/variable-lookup-suggestions.md @@ -0,0 +1,605 @@ +# Proposal: "Did You Mean?" Suggestions for Invalid Variable References + +## Status: Draft + +## Problem + +When a user writes a variable reference like `${bundle.git.origin_urlx}` (typo) +or `${var.my_clster_id}` (misspelling), the error message today is: + +``` +reference does not exist: ${bundle.git.origin_urlx} +``` + +That's it. No suggestions, no list of valid keys, no indication of what went +wrong. The user has to go back to the docs or mentally diff against the schema +to figure out the correct name. + +This is a common source of frustration: a single character typo in a long path +like `${resources.jobs.my_pipeline.tasks[0].task_key}` can take minutes to spot. + +Variable references are multi-level paths (e.g., `${resources.jobs.my_job.id}`). +A typo can occur in **any** component — or even in **multiple** components at +once. A good suggestion system must handle all of these cases. + +## What We Have Today + +### The Error Path + +When resolution fails, the call chain is: + +``` +resolve_variable_references.go: dynvar.Resolve(v, lookupFn) + resolve.go: r.resolveKey(dep, seen) + resolve.go: r.fn(p) // calls the lookup function + resolve_variable_references.go: m.lookupFn(normalized, path, b) + resolve_variable_references.go: dyn.GetByPath(v, path) + visit.go: m.GetByString(c.key) // FAILS HERE + return noSuchKeyError{path} +``` + +At the point of failure in `visit.go:135-137`: +```go +m := v.MustMap() +ev, ok := m.GetByString(c.key) +if !ok { + return InvalidValue, noSuchKeyError{path} +} +``` + +The map `m` contains **all sibling keys** — i.e., the valid alternatives. But +that information is discarded. The error only carries the failed `path`. + +Back in `resolve.go:200-201`, the error is rewrapped into a flat string: +```go +if dyn.IsNoSuchKeyError(err) { + err = fmt.Errorf("reference does not exist: ${%s}", key) +} +``` + +**Crucially**, `visit.go` stops at the **first** non-existent key. For a path +like `${resources.jbs.my_jb.id}` with typos in both `jbs` and `my_jb`: +1. `resources` — exists, traverse into it +2. `jbs` — **does not exist** → `NoSuchKeyError` immediately + +The traversal never reaches `my_jb`, so we can only suggest fixing `jbs`. The +user has to fix that, re-run, and discover the second typo. This round-trip +is exactly the frustration we want to avoid. + +### The Normalized Config Tree + +In `resolve_variable_references.go:203`, before resolution begins: +```go +normalized, _ := convert.Normalize(b.Config, root, convert.IncludeMissingFields) +``` + +This `normalized` tree includes: +- All struct-defined fields (from Go types), even if unset (via `IncludeMissingFields`) +- All user-defined map keys (resource names, variable names, etc.) + +This is the right source of truth for suggestions. It contains everything the +user could validly reference. + +## Design + +### Approach: Fuzzy Path Walk Against the Config Tree + +Rather than relying on the error from `visit.go` (which only tells us about the +first failing component), we do a **separate fuzzy walk** of the config tree when +a lookup fails. This walk processes every component in the reference path and +can fix typos in **multiple** components simultaneously. + +The flow: +1. Lookup fails with `NoSuchKeyError` +2. We walk the reference path component by component against the normalized tree +3. At each component, if the exact key exists, we follow it +4. If not, we fuzzy-match against sibling keys and follow the best match +5. If all components are resolved (some via fuzzy matching), we suggest the + corrected full path +6. If any component can't be fuzzy-matched (too far from all candidates), we + give up on the suggestion + +### Implementation + +#### 1. Levenshtein Distance Utility + +```go +// File: libs/dyn/dynvar/suggest.go + +package dynvar + +// levenshtein computes the edit distance between two strings. +func levenshtein(a, b string) int { + if len(a) == 0 { + return len(b) + } + if len(b) == 0 { + return len(a) + } + + // Use single-row DP to save memory. + prev := make([]int, len(b)+1) + for j := range prev { + prev[j] = j + } + + for i := range len(a) { + curr := make([]int, len(b)+1) + curr[0] = i + 1 + for j := range len(b) { + cost := 1 + if a[i] == b[j] { + cost = 0 + } + curr[j+1] = min( + curr[j]+1, // insertion + prev[j+1]+1, // deletion + prev[j]+cost, // substitution + ) + } + prev = curr + } + + return prev[len(b)] +} +``` + +#### 2. Single-Key Match Function + +```go +// closestKeyMatch finds the closest matching key from a list of candidates. +// Returns the best match and its edit distance. +// Returns ("", -1) if no candidate is within the distance threshold. +func closestKeyMatch(key string, candidates []string) (string, int) { + // Threshold: allow up to 3 edits, but no more than half the key length. + // This avoids suggesting wildly different strings for short keys. + maxDist := min(3, max(1, len(key)/2)) + + bestMatch := "" + bestDist := maxDist + 1 + + for _, c := range candidates { + d := levenshtein(key, c) + if d < bestDist { + bestDist = d + bestMatch = c + } + } + + if bestMatch == "" { + return "", -1 + } + return bestMatch, bestDist +} +``` + +#### 3. Fuzzy Path Walk + +This is the core new function. It walks the reference path against the config +tree, fuzzy-matching at each level: + +```go +// suggestPath walks the reference path against root and tries to find the +// closest valid path. At each component, it first tries an exact match; if +// that fails, it fuzzy-matches against available keys. +// +// Returns the suggested path as a string, or "" if no reasonable suggestion +// can be made. +func suggestPath(root dyn.Value, refPath dyn.Path) string { + current := root + suggested := make(dyn.Path, 0, len(refPath)) + + for _, component := range refPath { + if component.IsIndex() { + // For index components (e.g., [0]), we can't fuzzy-match. + // Just check if the index is valid and pass through. + s, ok := current.AsSequence() + if !ok || component.Index() >= len(s) { + return "" + } + suggested = append(suggested, component) + current = s[component.Index()] + continue + } + + key := component.Key() + m, ok := current.AsMap() + if !ok { + // Expected a map but got something else — can't suggest. + return "" + } + + // Try exact match first. + if v, exists := m.GetByString(key); exists { + suggested = append(suggested, component) + current = v + continue + } + + // Exact match failed — try fuzzy match. + candidates := m.StringKeys() + match, _ := closestKeyMatch(key, candidates) + if match == "" { + // No close match — can't suggest beyond this point. + return "" + } + + suggested = append(suggested, dyn.Key(match)) + v, _ := m.GetByString(match) + current = v + } + + return suggested.String() +} +``` + +**Key properties:** +- Handles typos at **any** level: first, middle, last, or multiple levels +- Index components (`[0]`) are passed through verbatim — no fuzzy matching +- Stops suggesting as soon as any component can't be matched (no partial guesses) +- Each component is matched independently, so two typos in different components + are both corrected + +#### 4. Wire It Into Resolution + +The suggestion logic needs access to the normalized config tree that the lookup +function uses. Today, the `Lookup` function type is: + +```go +type Lookup func(path dyn.Path) (dyn.Value, error) +``` + +The `resolve.go` resolver doesn't have direct access to the underlying tree — +it only has the lookup function. We add the suggestion logic at the layer above, +in `resolve_variable_references.go`, which has access to the `normalized` tree. + +**Option A: Pass a suggest function into the resolver** + +Add an optional suggest callback to the resolver: + +```go +// SuggestFn takes a failed reference path and returns a suggested correction, +// or "" if no suggestion can be made. +type SuggestFn func(path dyn.Path) string + +func Resolve(in dyn.Value, fn Lookup, opts ...ResolveOption) (out dyn.Value, err error) { + r := resolver{in: in, fn: fn} + for _, opt := range opts { + opt(&r) + } + return r.run() +} + +type ResolveOption func(*resolver) + +func WithSuggestFn(fn SuggestFn) ResolveOption { + return func(r *resolver) { + r.suggestFn = fn + } +} +``` + +Then in `resolveKey`: +```go +v, err := r.fn(p) +if err != nil { + if dyn.IsNoSuchKeyError(err) { + msg := fmt.Sprintf("reference does not exist: ${%s}", key) + + if r.suggestFn != nil { + if suggestion := r.suggestFn(p); suggestion != "" { + msg += fmt.Sprintf("; did you mean ${%s}?", suggestion) + } + } + + err = fmt.Errorf(msg) + } + + r.lookups[key] = lookupResult{v: dyn.InvalidValue, err: err} + return dyn.InvalidValue, err +} +``` + +And in `resolve_variable_references.go`, pass the suggest function: + +```go +return dynvar.Resolve(v, lookupFn, dynvar.WithSuggestFn(func(p dyn.Path) string { + return dynvar.SuggestPath(normalized, p) +})) +``` + +**Option B: Suggest at the `resolve_variable_references.go` level** + +Instead of modifying `Resolve`'s signature, wrap the error after `Resolve` +returns. This is simpler but less clean: + +```go +out, err := dynvar.Resolve(v, lookupFn) +if err != nil && dyn.IsNoSuchKeyError(err) { + // Extract the failed path and suggest... +} +``` + +The problem with Option B is that by the time `Resolve` returns, the original +`dyn.Path` is lost — it's been formatted into the error string. We'd have to +re-parse it or change the error type. **Option A is cleaner.** + +### Example Error Messages + +| Reference | Typos | Error After | +|-----------|-------|-------------| +| `${bundle.git.origin_urlx}` | 1 (leaf) | `did you mean ${bundle.git.origin_url}?` | +| `${resources.jbs.my_job.id}` | 1 (middle) | `did you mean ${resources.jobs.my_job.id}?` | +| `${resources.jbs.my_jb.id}` | 2 (middle + middle) | `did you mean ${resources.jobs.my_job.id}?` | +| `${bundel.git.origin_urlx}` | 2 (root + leaf) | `did you mean ${bundle.git.origin_url}?` | +| `${workspace.root_paht}` | 1 (leaf) | `did you mean ${workspace.root_path}?` | +| `${var.my_clster_id}` | 1 (leaf) | `did you mean ${var.my_cluster_id}?` | +| `${completely.wrong.path}` | all | *(no suggestion — too far at first component)* | +| `${resources.jobs.my_jb.idd}` | 2 (deep) | `did you mean ${resources.jobs.my_job.id}?` | + +### Walk-Through: Multi-Level Typo + +For `${resources.jbs.my_jb.id}`, the fuzzy walk proceeds: + +``` +Component Tree at this level Exact? Fuzzy match +───────── ────────────────── ────── ─────────── +resources {bundle, resources, ...} yes — +jbs {jobs, pipelines, ...} no "jobs" (dist=1) +my_jb {my_job, other_job, ...} no "my_job" (dist=2) +id {id, name, ...} yes — + +Suggested path: resources.jobs.my_job.id +``` + +All four components resolved, so we suggest `${resources.jobs.my_job.id}`. + +### Walk-Through: Unfixable Path + +For `${zzz.yyy.xxx}`: + +``` +Component Tree at this level Exact? Fuzzy match +───────── ────────────────── ────── ─────────── +zzz {bundle, resources, ...} no none (all dist>3) + +Suggested path: "" (give up) +``` + +No suggestion produced. + +## Scope + +### What This Covers + +- Typos in struct field names: `${bundle.git.origin_urlx}` (keys from Go types) +- Typos in user-defined names: `${var.my_clster_id}` (keys from user config) +- Typos in resource type names: `${resources.jbs.my_job.id}` +- Typos in resource instance names: `${resources.jobs.my_jb.id}` +- **Multi-level typos**: `${resources.jbs.my_jb.id}` (typos at two levels) + +### What This Does NOT Cover + +- **Invalid path structure** (e.g., `${a..b}` or `${a[x]}`) — this is a parse + error, not a lookup error, and would be handled by the parser proposal. +- **References to the wrong section** (e.g., user writes `${bundle.cluster_id}` + when they mean `${var.cluster_id}`) — the prefix is valid so we'd only + suggest keys within `bundle.*`. Cross-section suggestions would require + searching the entire tree, which is a separate feature. +- **Array index out of bounds** (e.g., `${resources.jobs.foo.tasks[99]}`) — this + is an `indexOutOfBoundsError`, not a `noSuchKeyError`. No suggestions apply. + +## `var` Shorthand + +The `${var.foo}` shorthand is rewritten to `${variables.foo.value}` before +lookup (in `resolve_variable_references.go:209-222`). The suggestion function +receives the **rewritten** path. If we suggest a corrected path, we should +convert it back to the shorthand form for the user-facing message. + +For example: +- User writes: `${var.my_clster_id}` +- Rewritten to: `${variables.my_clster_id.value}` +- Suggestion from fuzzy walk: `variables.my_cluster_id.value` +- User-facing message: `did you mean ${var.my_cluster_id}?` + +This reverse mapping is straightforward: if the suggested path starts with +`variables.` and ends with `.value`, strip those and prefix with `var.`. + +## File Changes + +| File | Change | +|------|--------| +| `libs/dyn/mapping.go` | Add `StringKeys()` helper | +| `libs/dyn/dynvar/suggest.go` | **New**: `levenshtein()`, `closestKeyMatch()`, `SuggestPath()` | +| `libs/dyn/dynvar/suggest_test.go` | **New**: tests for distance, matching, and path suggestion | +| `libs/dyn/dynvar/resolve.go` | Add `SuggestFn` field, use it in `resolveKey` | +| `libs/dyn/dynvar/resolve_test.go` | Add tests for suggestion error messages | +| `bundle/config/mutator/resolve_variable_references.go` | Pass `WithSuggestFn` to `Resolve` | + +Note: no changes to `libs/dyn/visit.go` — the suggestion logic is entirely +separate from the traversal error path. + +## Testing + +### Unit Tests for Levenshtein + Suggestions + +```go +func TestLevenshtein(t *testing.T) { + assert.Equal(t, 0, levenshtein("abc", "abc")) + assert.Equal(t, 1, levenshtein("abc", "ab")) // deletion + assert.Equal(t, 1, levenshtein("abc", "abcd")) // insertion + assert.Equal(t, 1, levenshtein("abc", "adc")) // substitution + assert.Equal(t, 3, levenshtein("abc", "xyz")) // all different + assert.Equal(t, 3, levenshtein("", "abc")) // empty vs non-empty +} + +func TestClosestKeyMatch(t *testing.T) { + candidates := []string{"origin_url", "branch", "commit"} + + match, dist := closestKeyMatch("origin_urlx", candidates) + assert.Equal(t, "origin_url", match) + assert.Equal(t, 1, dist) + + match, _ = closestKeyMatch("zzzzzzz", candidates) + assert.Equal(t, "", match) +} +``` + +### Fuzzy Path Walk Tests + +```go +func TestSuggestPathSingleTypo(t *testing.T) { + root := dyn.V(map[string]dyn.Value{ + "bundle": dyn.V(map[string]dyn.Value{ + "git": dyn.V(map[string]dyn.Value{ + "origin_url": dyn.V(""), + "branch": dyn.V(""), + }), + }), + }) + + p := dyn.MustPathFromString("bundle.git.origin_urlx") + assert.Equal(t, "bundle.git.origin_url", SuggestPath(root, p)) +} + +func TestSuggestPathMultiLevelTypo(t *testing.T) { + root := dyn.V(map[string]dyn.Value{ + "resources": dyn.V(map[string]dyn.Value{ + "jobs": dyn.V(map[string]dyn.Value{ + "my_job": dyn.V(map[string]dyn.Value{ + "id": dyn.V(""), + }), + }), + }), + }) + + p := dyn.MustPathFromString("resources.jbs.my_jb.id") + assert.Equal(t, "resources.jobs.my_job.id", SuggestPath(root, p)) +} + +func TestSuggestPathNoMatch(t *testing.T) { + root := dyn.V(map[string]dyn.Value{ + "bundle": dyn.V(map[string]dyn.Value{ + "name": dyn.V(""), + }), + }) + + p := dyn.MustPathFromString("zzzzz.yyyyy") + assert.Equal(t, "", SuggestPath(root, p)) +} + +func TestSuggestPathWithIndex(t *testing.T) { + root := dyn.V(map[string]dyn.Value{ + "resources": dyn.V(map[string]dyn.Value{ + "jobs": dyn.V(map[string]dyn.Value{ + "my_job": dyn.V(map[string]dyn.Value{ + "tasks": dyn.V([]dyn.Value{ + dyn.V(map[string]dyn.Value{ + "task_key": dyn.V(""), + }), + }), + }), + }), + }), + }) + + p := dyn.MustPathFromString("resources.jobs.my_job.tasks[0].tsk_key") + assert.Equal(t, "resources.jobs.my_job.tasks[0].task_key", SuggestPath(root, p)) +} +``` + +### Integration-Level Tests + +```go +func TestResolveNotFoundWithSuggestion(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "bundle": dyn.V(map[string]dyn.Value{ + "name": dyn.V("my-bundle"), + "target": dyn.V("dev"), + }), + "ref": dyn.V("${bundle.nme}"), + }) + + _, err := dynvar.Resolve(in, dynvar.DefaultLookup(in), + dynvar.WithSuggestFn(func(p dyn.Path) string { + return dynvar.SuggestPath(in, p) + }), + ) + assert.ErrorContains(t, err, "reference does not exist: ${bundle.nme}") + assert.ErrorContains(t, err, "did you mean ${bundle.name}?") +} + +func TestResolveNotFoundMultiLevelTypo(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "resources": dyn.V(map[string]dyn.Value{ + "jobs": dyn.V(map[string]dyn.Value{ + "my_job": dyn.V(map[string]dyn.Value{ + "id": dyn.V("123"), + }), + }), + }), + "ref": dyn.V("${resources.jbs.my_jb.id}"), + }) + + _, err := dynvar.Resolve(in, dynvar.DefaultLookup(in), + dynvar.WithSuggestFn(func(p dyn.Path) string { + return dynvar.SuggestPath(in, p) + }), + ) + assert.ErrorContains(t, err, "did you mean ${resources.jobs.my_job.id}?") +} +``` + +## Alternatives Considered + +### A. Fix one component at a time (original proposal) + +Only suggest a fix for the first failing component. After the user fixes that, +they re-run and discover the next typo. + +**Rejected** because: +- Requires multiple round-trips for multi-level typos +- The fuzzy walk approach is barely more complex but gives a much better UX + +### B. Enumerate all valid paths in the error + +List all valid sibling keys: + +``` +reference does not exist: ${bundle.nme}; valid keys at "bundle" are: name, target, git +``` + +**Rejected** because for large maps (e.g., `resources.jobs` with dozens of jobs) +this would produce very noisy output. A single close match is more actionable. + +### C. Search the entire tree for the closest leaf path + +Walk the entire normalized tree and compute edit distance for every possible +leaf path against the full reference string. + +**Rejected** because: +- Expensive for large configs (every leaf × string distance) +- Could suggest paths in completely unrelated sections +- The per-component walk is more predictable and faster (bounded by path depth) + +### D. Do nothing — rely on docs/IDE support + +**Rejected** because: +- Many users don't use an IDE for YAML editing +- The error happens at `databricks bundle validate` time, which is the right + place for actionable feedback +- This is low-effort, high-value + +## Relationship to Parser Proposal + +This proposal is **independent** of the regex-to-parser migration. It can be +implemented with the current regex-based `NewRef` — the suggestion logic operates +at the resolution level, not the parsing level. + +However, the two proposals complement each other: +- The parser proposal improves error reporting for **malformed** references + (e.g., `${a..b}`, unterminated `${`) +- This proposal improves error reporting for **well-formed but invalid** + references (e.g., `${bundle.nme}`) + +Both can be implemented and shipped independently. diff --git a/go.mod b/go.mod index 08c9d4efab..599e02e67b 100644 --- a/go.mod +++ b/go.mod @@ -51,6 +51,8 @@ require gopkg.in/yaml.v3 v3.0.1 // indirect // Dependencies for experimental SSH commands require github.com/tailscale/hujson v0.0.0-20250605163823-992244df8c5a // BSD-3-Clause +require github.com/creachadair/jrpc2 v1.3.5 + require ( cloud.google.com/go/auth v0.18.1 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect @@ -71,6 +73,7 @@ require ( github.com/clipperhouse/stringish v0.1.1 // indirect github.com/clipperhouse/uax29/v2 v2.5.0 // indirect github.com/cloudflare/circl v1.6.3 // indirect + github.com/creachadair/mds v0.26.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect diff --git a/go.sum b/go.sum index ada133ffd8..7cef064a64 100644 --- a/go.sum +++ b/go.sum @@ -71,6 +71,10 @@ github.com/clipperhouse/uax29/v2 v2.5.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsV github.com/cloudflare/circl v1.6.3 h1:9GPOhQGF9MCYUeXyMYlqTR6a5gTrgR/fBLXvUgtVcg8= github.com/cloudflare/circl v1.6.3/go.mod h1:2eXP6Qfat4O/Yhh8BznvKnJ+uzEoTQ6jVKJRn81BiS4= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/creachadair/jrpc2 v1.3.5 h1:onJko+1u6xoiRph3xwWmfNISR91teCRhbJwSyS9Svzo= +github.com/creachadair/jrpc2 v1.3.5/go.mod h1:YXDmS53AavsiytbAwskrczJPcVHvKC9GoyWzwfSQXoE= +github.com/creachadair/mds v0.26.1 h1:CQG8f4cueHX/c20q5Sy/Ubk8Bvy+aRzVgbpxVieMBAs= +github.com/creachadair/mds v0.26.1/go.mod h1:dMBTCSy3iS3dwh4Rb1zxeZz2d7K8+N24GCTsayWtQRI= github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s= github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE= github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s=