diff --git a/acceptance/bundle/user_agent/output.txt b/acceptance/bundle/user_agent/output.txt index bf128624271..5cda322d3d8 100644 --- a/acceptance/bundle/user_agent/output.txt +++ b/acceptance/bundle/user_agent/output.txt @@ -8,6 +8,7 @@ OK deploy.direct /api/2.0/workspace/get-status engine/direct OK deploy.direct /api/2.0/workspace/get-status engine/direct OK deploy.direct /api/2.0/workspace/get-status engine/direct OK deploy.direct /api/2.0/workspace/get-status engine/direct +OK deploy.direct /api/2.0/workspace/list-repo engine/direct OK deploy.direct /api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/empty.py engine/direct OK deploy.direct /api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/deploy.lock engine/direct OK deploy.direct /api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/deployment.json engine/direct @@ -28,6 +29,7 @@ OK deploy.terraform /api/2.0/workspace/get-status engine/terraform OK deploy.terraform /api/2.0/workspace/get-status engine/terraform OK deploy.terraform /api/2.0/workspace/get-status engine/terraform OK deploy.terraform /api/2.0/workspace/get-status engine/terraform +OK deploy.terraform /api/2.0/workspace/list-repo engine/terraform OK deploy.terraform /api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/empty.py engine/terraform OK deploy.terraform /api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/deploy.lock engine/terraform OK deploy.terraform /api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/deployment.json engine/terraform diff --git a/acceptance/bundle/user_agent/simple/out.requests.deploy.direct.json b/acceptance/bundle/user_agent/simple/out.requests.deploy.direct.json index cc39aad6e9b..a84d67497c6 100644 --- a/acceptance/bundle/user_agent/simple/out.requests.deploy.direct.json +++ b/acceptance/bundle/user_agent/simple/out.requests.deploy.direct.json @@ -123,6 +123,19 @@ "return_export_info": "true" } } +{ + "headers": { + "User-Agent": [ + "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS] cmd/bundle_deploy cmd-exec-id/[UUID] interactive/none engine/direct auth/pat" + ] + }, + "method": "GET", + "path": "/api/2.0/workspace/list-repo", + "q": { + "path": "/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files", + "return_wsfs_metadata": "true" + } +} { "headers": { "User-Agent": [ diff --git a/acceptance/bundle/user_agent/simple/out.requests.deploy.terraform.json b/acceptance/bundle/user_agent/simple/out.requests.deploy.terraform.json index 435b188af3b..9661fc629d1 100644 --- a/acceptance/bundle/user_agent/simple/out.requests.deploy.terraform.json +++ b/acceptance/bundle/user_agent/simple/out.requests.deploy.terraform.json @@ -123,6 +123,19 @@ "return_export_info": "true" } } +{ + "headers": { + "User-Agent": [ + "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS] cmd/bundle_deploy cmd-exec-id/[UUID] interactive/none engine/terraform auth/pat" + ] + }, + "method": "GET", + "path": "/api/2.0/workspace/list-repo", + "q": { + "path": "/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files", + "return_wsfs_metadata": "true" + } +} { "headers": { "User-Agent": [ diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index c6c62816bbc..99b41a02a47 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -338,6 +338,72 @@ func (w *WorkspaceFilesClient) Mkdir(ctx context.Context, name string) error { }) } +// RemoteFileMetadata describes a single workspace object returned by the +// list-repo API with return_wsfs_metadata=true. +type RemoteFileMetadata struct { + // Absolute workspace path. Note that for notebooks, the extension is + // stripped (e.g. /Workspace/foo/bar.py -> /Workspace/foo/bar). + Path string + + // SHA-256 hex digest of the blob. Populated only for FILE and NOTEBOOK + // objects (not directories) when the workspace returns wsfs metadata. + ContentSHA256Hex string + + // "FILE", "NOTEBOOK", or "DIRECTORY". + ObjectType string +} + +// ListWithSHAs recursively lists all workspace objects under the given path +// and returns their content SHAs from the workspace's wsfs metadata. This uses +// /api/2.0/workspace/list-repo with the (currently undocumented in the SDK) +// return_wsfs_metadata=true flag, which causes the response to include a +// content_sha256_hex field for files and notebooks. +// +// Returns nil if the path does not exist; callers should treat that as "no +// remote state to merge" rather than an error. +func (w *WorkspaceFilesClient) ListWithSHAs(ctx context.Context, dirPath string) ([]RemoteFileMetadata, error) { + type listObject struct { + ObjectType string `json:"object_type"` + Path string `json:"path"` + ContentSHA256Hex string `json:"content_sha256_hex"` + HasWsfsMetadata bool `json:"has_wsfs_metadata"` + } + type listResponse struct { + Objects []listObject `json:"objects"` + } + + var resp listResponse + err := w.apiClient.Do( + ctx, + http.MethodGet, + "/api/2.0/workspace/list-repo", + w.orgIDHeaders(), + nil, + map[string]any{ + "path": dirPath, + "return_wsfs_metadata": true, + }, + &resp, + ) + if err != nil { + var aerr *apierr.APIError + if errors.As(err, &aerr) && aerr.StatusCode == http.StatusNotFound { + return nil, nil + } + return nil, err + } + + out := make([]RemoteFileMetadata, 0, len(resp.Objects)) + for _, o := range resp.Objects { + out = append(out, RemoteFileMetadata{ + Path: o.Path, + ContentSHA256Hex: o.ContentSHA256Hex, + ObjectType: o.ObjectType, + }) + } + return out, nil +} + func (w *WorkspaceFilesClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { absPath, err := w.root.Join(name) if err != nil { diff --git a/libs/fileset/file.go b/libs/fileset/file.go index 0a27b294cf7..0d0f6440ab0 100644 --- a/libs/fileset/file.go +++ b/libs/fileset/file.go @@ -67,6 +67,12 @@ func (f File) Modified() (ts time.Time) { return info.ModTime() } +// Open returns a reader for the file contents. The caller is responsible +// for closing it. +func (f File) Open() (fs.File, error) { + return f.root.Open(f.Relative) +} + func (f *File) IsNotebook() (bool, error) { if f.fileType != Unknown { return f.fileType == Notebook, nil diff --git a/libs/sync/remote_filter.go b/libs/sync/remote_filter.go new file mode 100644 index 00000000000..dcf0427c2c8 --- /dev/null +++ b/libs/sync/remote_filter.go @@ -0,0 +1,133 @@ +package sync + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "io" + "path" + + "github.com/databricks/cli/libs/filer" + "github.com/databricks/cli/libs/fileset" + "github.com/databricks/cli/libs/log" +) + +// shaLister fetches content SHAs for a workspace directory in one bulk call. +// The interface lets tests stub the API call without spinning up a fake +// filer. +type shaLister interface { + ListWithSHAs(ctx context.Context, dirPath string) ([]filer.RemoteFileMetadata, error) +} + +// RemoteFilter is the third layer of the sync pipeline. It takes the action +// plan produced by the snapshot diff and drops puts whose remote SHA already +// matches the local SHA — files the workspace already has, byte-for-byte. +// +// The expensive work (one bulk list, one SHA per skipped put candidate) only +// pays off when the snapshot diff has produced false-positive puts at scale. +// The caller decides when to invoke Apply; today that's only on a fresh +// snapshot (no prior local state). +type RemoteFilter struct { + lister shaLister + remotePath string +} + +func NewRemoteFilter(lister shaLister, remotePath string) *RemoteFilter { + return &RemoteFilter{lister: lister, remotePath: remotePath} +} + +// Apply returns a copy of d with put entries removed for files whose local +// SHA already matches the remote SHA. Errors fetching or computing SHAs are +// logged and treated as "do not skip" — the worst case is an unnecessary +// upload, which is the existing behavior. +func (f *RemoteFilter) Apply(ctx context.Context, d diff, files []fileset.File, localToRemote map[string]string) diff { + if len(d.put) == 0 || f == nil || f.lister == nil { + return d + } + + remote, err := f.lister.ListWithSHAs(ctx, f.remotePath) + if err != nil { + log.Warnf(ctx, "could not fetch remote content SHAs from %s; uploading all candidate files: %s", f.remotePath, err) + return d + } + if len(remote) == 0 { + return d + } + + remoteSHAByPath := make(map[string]string, len(remote)) + for _, e := range remote { + if e.ContentSHA256Hex == "" { + continue + } + remoteSHAByPath[e.Path] = e.ContentSHA256Hex + } + + localByRelative := make(map[string]*fileset.File, len(files)) + for i := range files { + localByRelative[files[i].Relative] = &files[i] + } + + keep := make([]string, 0, len(d.put)) + skipped := 0 + for _, p := range d.put { + if !f.canSkip(ctx, p, localByRelative, localToRemote, remoteSHAByPath) { + keep = append(keep, p) + continue + } + skipped++ + } + + if skipped > 0 { + log.Debugf(ctx, "remote-filter: skipped %d/%d uploads matching workspace SHAs", skipped, len(d.put)) + } + + return diff{ + delete: d.delete, + rmdir: d.rmdir, + mkdir: d.mkdir, + put: keep, + } +} + +// canSkip reports whether the put for relativePath can be safely dropped: +// the workspace already has a file at the corresponding remote path with the +// same SHA-256 as the local file. +func (f *RemoteFilter) canSkip( + ctx context.Context, + relativePath string, + localByRelative map[string]*fileset.File, + localToRemote map[string]string, + remoteSHAByPath map[string]string, +) bool { + local, ok := localByRelative[relativePath] + if !ok { + return false + } + remoteName, ok := localToRemote[relativePath] + if !ok { + return false + } + remoteSHA, ok := remoteSHAByPath[path.Join(f.remotePath, remoteName)] + if !ok { + return false + } + localSHA, err := computeFileSHA(local) + if err != nil { + log.Debugf(ctx, "remote-filter: hashing %s failed; will upload: %s", relativePath, err) + return false + } + return localSHA == remoteSHA +} + +func computeFileSHA(f *fileset.File) (string, error) { + rc, err := f.Open() + if err != nil { + return "", err + } + defer rc.Close() + h := sha256.New() + if _, err := io.Copy(h, rc); err != nil { + return "", err + } + return hex.EncodeToString(h.Sum(nil)), nil +} diff --git a/libs/sync/remote_filter_test.go b/libs/sync/remote_filter_test.go new file mode 100644 index 00000000000..33e74aecf5b --- /dev/null +++ b/libs/sync/remote_filter_test.go @@ -0,0 +1,169 @@ +package sync + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "errors" + "path" + "testing" + + "github.com/databricks/cli/libs/filer" + "github.com/databricks/cli/libs/fileset" + "github.com/databricks/cli/libs/vfs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type stubLister struct { + out []filer.RemoteFileMetadata + err error +} + +func (s *stubLister) ListWithSHAs(ctx context.Context, dirPath string) ([]filer.RemoteFileMetadata, error) { + return s.out, s.err +} + +func sha256OfFile(t *testing.T, root vfs.Path, relative string) string { + t.Helper() + r, err := root.Open(relative) + require.NoError(t, err) + defer r.Close() + h := sha256.New() + _, err = h.Write(mustReadAll(t, r)) + require.NoError(t, err) + return hex.EncodeToString(h.Sum(nil)) +} + +func mustReadAll(t *testing.T, r interface{ Read(p []byte) (int, error) }) []byte { + t.Helper() + buf := make([]byte, 0, 1024) + tmp := make([]byte, 256) + for { + n, err := r.Read(tmp) + if n > 0 { + buf = append(buf, tmp[:n]...) + } + if err != nil { + break + } + } + return buf +} + +const remoteRoot = "/Workspace/Users/foo@databricks.com/proj" + +// loadTestFiles returns the test fileset and the mtime-based SnapshotState +// (which provides LocalToRemoteNames). All tests share the same on-disk +// fileset under testdata/sync-fileset. +func loadTestFiles(t *testing.T) ([]fileset.File, map[string]string) { + t.Helper() + fs := fileset.New(vfs.MustNew("./testdata/sync-fileset")) + files, err := fs.Files() + require.NoError(t, err) + state, err := NewSnapshotState(files) + require.NoError(t, err) + return files, state.LocalToRemoteNames +} + +func TestRemoteFilterReturnsUnchangedWhenNoPuts(t *testing.T) { + rf := NewRemoteFilter(&stubLister{}, remoteRoot) + d := diff{delete: []string{"old"}, rmdir: []string{"olddir"}} + got := rf.Apply(t.Context(), d, nil, nil) + assert.Equal(t, d.delete, got.delete) + assert.Equal(t, d.rmdir, got.rmdir) + assert.Empty(t, got.put) +} + +func TestRemoteFilterReturnsUnchangedWhenListErrors(t *testing.T) { + files, ltr := loadTestFiles(t) + rf := NewRemoteFilter(&stubLister{err: errors.New("boom")}, remoteRoot) + in := diff{put: []string{"my-script.py"}} + got := rf.Apply(t.Context(), in, files, ltr) + assert.Equal(t, []string{"my-script.py"}, got.put) +} + +func TestRemoteFilterReturnsUnchangedWhenRemoteEmpty(t *testing.T) { + files, ltr := loadTestFiles(t) + rf := NewRemoteFilter(&stubLister{out: nil}, remoteRoot) + in := diff{put: []string{"my-script.py"}} + got := rf.Apply(t.Context(), in, files, ltr) + assert.Equal(t, []string{"my-script.py"}, got.put) +} + +func TestRemoteFilterDropsPutWhenSHAMatches(t *testing.T) { + files, ltr := loadTestFiles(t) + root := vfs.MustNew("./testdata/sync-fileset") + wantSHA := sha256OfFile(t, root, "my-script.py") + + rf := NewRemoteFilter(&stubLister{out: []filer.RemoteFileMetadata{ + {Path: path.Join(remoteRoot, "my-script.py"), ContentSHA256Hex: wantSHA, ObjectType: "FILE"}, + }}, remoteRoot) + + in := diff{put: []string{"my-script.py"}} + got := rf.Apply(t.Context(), in, files, ltr) + assert.Empty(t, got.put) +} + +func TestRemoteFilterKeepsPutWhenSHAMismatches(t *testing.T) { + files, ltr := loadTestFiles(t) + rf := NewRemoteFilter(&stubLister{out: []filer.RemoteFileMetadata{ + {Path: path.Join(remoteRoot, "my-script.py"), ContentSHA256Hex: "deadbeef", ObjectType: "FILE"}, + }}, remoteRoot) + + in := diff{put: []string{"my-script.py"}} + got := rf.Apply(t.Context(), in, files, ltr) + assert.Equal(t, []string{"my-script.py"}, got.put) +} + +func TestRemoteFilterKeepsPutWhenRemoteMissing(t *testing.T) { + files, ltr := loadTestFiles(t) + rf := NewRemoteFilter(&stubLister{out: []filer.RemoteFileMetadata{ + {Path: path.Join(remoteRoot, "other-file"), ContentSHA256Hex: "x"}, + }}, remoteRoot) + + in := diff{put: []string{"my-script.py"}} + got := rf.Apply(t.Context(), in, files, ltr) + assert.Equal(t, []string{"my-script.py"}, got.put) +} + +func TestRemoteFilterDropsPutForNotebookWhenSHAMatches(t *testing.T) { + // Notebooks are stored without their .py extension on the workspace — + // LocalToRemoteNames maps "my-nb.py" -> "my-nb". The filter must use the + // remote name when looking up the SHA. + files, ltr := loadTestFiles(t) + root := vfs.MustNew("./testdata/sync-fileset") + wantSHA := sha256OfFile(t, root, "my-nb.py") + require.Equal(t, "my-nb", ltr["my-nb.py"]) + + rf := NewRemoteFilter(&stubLister{out: []filer.RemoteFileMetadata{ + {Path: path.Join(remoteRoot, "my-nb"), ContentSHA256Hex: wantSHA, ObjectType: "NOTEBOOK"}, + }}, remoteRoot) + + in := diff{put: []string{"my-nb.py"}} + got := rf.Apply(t.Context(), in, files, ltr) + assert.Empty(t, got.put) +} + +func TestRemoteFilterMixedDiff(t *testing.T) { + files, ltr := loadTestFiles(t) + root := vfs.MustNew("./testdata/sync-fileset") + scriptSHA := sha256OfFile(t, root, "my-script.py") + validNbSHA := sha256OfFile(t, root, "valid-nb.ipynb") + + rf := NewRemoteFilter(&stubLister{out: []filer.RemoteFileMetadata{ + {Path: path.Join(remoteRoot, "my-script.py"), ContentSHA256Hex: scriptSHA, ObjectType: "FILE"}, + {Path: path.Join(remoteRoot, "my-nb"), ContentSHA256Hex: "stale", ObjectType: "NOTEBOOK"}, + {Path: path.Join(remoteRoot, "valid-nb"), ContentSHA256Hex: validNbSHA, ObjectType: "NOTEBOOK"}, + }}, remoteRoot) + + in := diff{ + put: []string{"my-script.py", "my-nb.py", "valid-nb.ipynb"}, + delete: []string{"gone"}, + mkdir: []string{"new"}, + } + got := rf.Apply(t.Context(), in, files, ltr) + assert.ElementsMatch(t, []string{"my-nb.py"}, got.put, "only the SHA-mismatched notebook should be re-uploaded") + assert.Equal(t, []string{"gone"}, got.delete) + assert.Equal(t, []string{"new"}, got.mkdir) +} diff --git a/libs/sync/sync.go b/libs/sync/sync.go index c65b49eb775..9a6a7e52259 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -45,6 +45,24 @@ type SyncOptions struct { DryRun bool } +// Sync runs file synchronization in three layers: +// +// 1. Discovery (libs/git, libs/fileset): walks the local tree and produces a +// list of files to consider, honoring include/exclude rules. +// +// 2. Snapshot diff (libs/sync/snapshot.go, libs/sync/diff.go): compares the +// discovered files against a local snapshot of mtimes from the previous +// run and produces a diff (puts, deletes, mkdirs, rmdirs) — the action +// plan for this iteration. If no snapshot exists, every file becomes a +// put. +// +// 3. Remote filter (libs/sync/remote_filter.go): an optional pre-flight that +// fetches content SHAs from the workspace and drops puts whose remote SHA +// already matches the local SHA. We only run it when the snapshot is +// fresh (no prior state), which is the only case where Layer 2 produces +// false-positive puts at scale (e.g. on a CI runner). When a local +// snapshot exists, Layer 2 is already accurate enough; paying for a +// bulk remote list would be wasted work. type Sync struct { *SyncOptions @@ -52,8 +70,9 @@ type Sync struct { includeFileSet *fileset.FileSet excludeFileSet *fileset.FileSet - snapshot *Snapshot - filer filer.Filer + snapshot *Snapshot + filer filer.Filer + remoteFilter *RemoteFilter // Synchronization progress events are sent to this event notifier. notifier EventNotifier @@ -111,11 +130,19 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) { } } - filer, err := filer.NewWorkspaceFilesClient(opts.WorkspaceClient, opts.RemotePath) + filerImpl, err := filer.NewWorkspaceFilesClient(opts.WorkspaceClient, opts.RemotePath) if err != nil { return nil, err } + // The remote SHA list call is not part of the Filer interface (it's a + // sync-only optimization, not a general filesystem op), so we type-assert + // the concrete client. In tests we plug in a stub via NewWithRemoteFilter. + var remoteFilter *RemoteFilter + if wfc, ok := filerImpl.(*filer.WorkspaceFilesClient); ok { + remoteFilter = NewRemoteFilter(wfc, opts.RemotePath) + } + var notifier EventNotifier outputWaitGroup := &stdsync.WaitGroup{} if opts.OutputHandler != nil { @@ -135,7 +162,8 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) { includeFileSet: includeFileSet, excludeFileSet: excludeFileSet, snapshot: snapshot, - filer: filer, + filer: filerImpl, + remoteFilter: remoteFilter, notifier: notifier, outputWaitGroup: outputWaitGroup, seq: 0, @@ -178,16 +206,26 @@ func (s *Sync) notifyComplete(ctx context.Context, d diff) { // Returns the list of files tracked (and synchronized) by the syncer during the run, // and an error if any occurred. func (s *Sync) RunOnce(ctx context.Context) ([]fileset.File, error) { + // Layer 1: discovery. files, err := s.GetFileList(ctx) if err != nil { return files, err } + // Layer 2: snapshot-driven action plan. change, err := s.snapshot.diff(ctx, files) if err != nil { return files, err } + // Layer 3: remote-state filter, only when the snapshot is fresh. + // With an existing snapshot, Layer 2 is precise; with no snapshot, every + // file is a put — so we ask the workspace what's already there and drop + // puts whose contents already match. + if s.snapshot.New && s.remoteFilter != nil { + change = s.remoteFilter.Apply(ctx, change, files, s.snapshot.LocalToRemoteNames) + } + s.notifyStart(ctx, change) if change.IsEmpty() { s.notifyComplete(ctx, change) diff --git a/libs/sync/walkers_bench_test.go b/libs/sync/walkers_bench_test.go new file mode 100644 index 00000000000..d6103587117 --- /dev/null +++ b/libs/sync/walkers_bench_test.go @@ -0,0 +1,166 @@ +//go:build benchworkspace + +// BenchmarkWalkers — minimal head-to-head comparison of /workspace/list-repo +// (recursive, server-side) vs a client-side parallel walker over the +// non-recursive /workspace/list endpoint. +// +// Always 100 files, distributed round-robin across the available leaves of +// each tree shape. Total file count is constant; only depth/breadth changes: +// +// leaves=1 → 100 files in one dir (flat) +// leaves=4 → ~25 files per dir, depth 2 +// leaves=16 → ~6-7 files per dir, depth 4 +// leaves=64 → ~1-2 files per dir, depth 6 +// +// The scaffold lives at a hardcoded persistent path: +// +// /Users/$DATABRICKS_BENCH_USER/.tmp/sync-bench-walkers-fixture/ +// +// First run populates it (~one-time cost). Subsequent runs reuse it, so +// repeated bench iterations are fast. To force a fresh scaffold, delete the +// fixture dir from the workspace. +// +// Run with 3 iterations per cell: +// +// export DATABRICKS_BENCH_PROFILE= +// export DATABRICKS_BENCH_USER= +// go test -tags benchworkspace -bench=BenchmarkWalkers$ -benchtime=3x -timeout=15m ./libs/sync/ + +package sync + +import ( + "context" + "fmt" + "os" + "path" + "testing" + + "github.com/databricks/cli/libs/filer" + "github.com/databricks/databricks-sdk-go" + apiclient "github.com/databricks/databricks-sdk-go/client" + "github.com/databricks/databricks-sdk-go/config" +) + +const ( + walkersFilesPerCell = 100 + walkersFixtureName = "sync-bench-walkers-fixture" +) + +func BenchmarkWalkers(b *testing.B) { + profile := os.Getenv("DATABRICKS_BENCH_PROFILE") + user := os.Getenv("DATABRICKS_BENCH_USER") + if profile == "" || user == "" { + b.Skip("DATABRICKS_BENCH_PROFILE and DATABRICKS_BENCH_USER must be set") + } + wc, err := databricks.NewWorkspaceClient(&databricks.Config{Profile: profile}) + if err != nil { + b.Fatalf("workspace client: %v", err) + } + apiC, err := apiclient.New(&config.Config{Profile: profile}) + if err != nil { + b.Fatalf("api client: %v", err) + } + + fixtureRoot := fmt.Sprintf("/Users/%s/.tmp/%s", user, walkersFixtureName) + if err := wc.Workspace.MkdirsByPath(context.Background(), fixtureRoot); err != nil { + b.Fatalf("mkdir fixture root: %v", err) + } + env := &benchEnv{wc: wc, apiClient: apiC, username: user, root: fixtureRoot} + + wfc, err := filer.NewWorkspaceFilesClient(wc, fixtureRoot) + if err != nil { + b.Fatalf("filer: %v", err) + } + lister := wfc.(*filer.WorkspaceFilesClient) + + cases := []struct { + name string + shape treeShape + }{ + {"leaves=1", treeShape{"flat", 0, 0}}, + {"leaves=4", treeShape{"small", 2, 2}}, + {"leaves=16", treeShape{"medium", 4, 2}}, + {"leaves=64", treeShape{"large", 6, 2}}, + } + + for _, c := range cases { + dir := path.Join(fixtureRoot, c.name) + items := walkersItemsRoundRobin(c.shape, walkersFilesPerCell) + ensureWalkersFixture(b, env, lister, dir, items) + + b.Run(c.name+"/list-repo", func(b *testing.B) { + for i := 0; i < b.N; i++ { + if _, err := lister.ListWithSHAs(context.Background(), dir); err != nil { + b.Fatalf("list-repo: %v", err) + } + } + }) + b.Run(c.name+"/parallel-walk", func(b *testing.B) { + for i := 0; i < b.N; i++ { + if _, err := parallelWalk(context.Background(), apiC, dir, 8); err != nil { + b.Fatalf("parallel-walk: %v", err) + } + } + }) + } +} + +// walkersItemsRoundRobin generates n file paths distributed round-robin +// across the leaves of the given shape (file 0 → leaf 0, file 1 → leaf 1, …, +// rolling over once every leaf has one file). Returns a (relPath → body) +// map suitable for passing to populate(). +func walkersItemsRoundRobin(shape treeShape, n int) map[string][]byte { + gen := generators()["file"] + leaves := walkersLeafPaths(shape) + out := make(map[string][]byte, n) + for i := 0; i < n; i++ { + leaf := leaves[i%len(leaves)] + suf, body := gen(i) + rel := path.Join(leaf, fmt.Sprintf("f-%d%s", i, suf)) + out[rel] = body + } + return out +} + +// walkersLeafPaths returns every leaf-dir path for the given shape, in DFS +// order. For shape=flat returns [""], so files land directly under the cell +// root. +func walkersLeafPaths(shape treeShape) []string { + if shape.depth == 0 { + return []string{""} + } + var paths []string + var walk func(prefix string, d int) + walk = func(prefix string, d int) { + if d == 0 { + paths = append(paths, prefix) + return + } + for b := 0; b < shape.branch; b++ { + walk(path.Join(prefix, fmt.Sprintf("d%d", b)), d-1) + } + } + walk("", shape.depth) + return paths +} + +// ensureWalkersFixture populates dir with items only if the existing fixture +// doesn't already match. Existence check is by file count under dir (using +// list-repo). Lets repeat bench runs reuse the scaffolding. +func ensureWalkersFixture(b *testing.B, env *benchEnv, lister *filer.WorkspaceFilesClient, dir string, items map[string][]byte) { + existing, err := lister.ListWithSHAs(context.Background(), dir) + if err == nil { + fileCount := 0 + for _, o := range existing { + if o.ContentSHA256Hex != "" { + fileCount++ + } + } + if fileCount == len(items) { + b.Logf("fixture %s reused (%d files)", dir, fileCount) + return + } + } + b.Logf("scaffolding fixture %s (%d files)", dir, len(items)) + populate(b, env, dir, items) +} diff --git a/libs/sync/workspace_bench_test.go b/libs/sync/workspace_bench_test.go new file mode 100644 index 00000000000..c70bf3d99d4 --- /dev/null +++ b/libs/sync/workspace_bench_test.go @@ -0,0 +1,532 @@ +//go:build benchworkspace + +// Workspace benchmarks for libs/sync. Real-network — they hit a Databricks +// workspace and import / list / delete files there. +// +// Quickstart: +// +// export DATABRICKS_BENCH_PROFILE=my-profile # required; profile from .databrickscfg +// export DATABRICKS_BENCH_USER=me@example.com # required; remote dirs go under /Users/$DATABRICKS_BENCH_USER/.tmp/ +// +// go test -tags benchworkspace -bench=. -benchtime=5x -timeout=90m ./libs/sync/... +// +// Run a single group: +// +// go test -tags benchworkspace -bench=BenchmarkListRepo$ -benchtime=10x -timeout=20m ./libs/sync/... +// go test -tags benchworkspace -bench=BenchmarkListRepoByContent -benchtime=10x -timeout=20m ./libs/sync/... +// go test -tags benchworkspace -bench=BenchmarkListWalkers -benchtime=5x -timeout=30m ./libs/sync/... +// go test -tags benchworkspace -bench=BenchmarkSyncRunOnceColdSnapshot -benchtime=5x -timeout=30m ./libs/sync/... +// +// Filter further with -bench='BenchmarkListWalkers/shape=medium/N=200'. +// +// Each benchmark group creates and tears down a unique remote tree under +// /Users/$DATABRICKS_BENCH_USER/.tmp/sync-bench-/ on the configured +// workspace. Use a scratch profile. + +package sync + +import ( + "context" + "crypto/rand" + "encoding/hex" + "fmt" + "net/http" + "net/url" + "os" + "path" + "path/filepath" + "sort" + "strings" + stdsync "sync" + "sync/atomic" + "testing" + + "github.com/databricks/cli/libs/filer" + "github.com/databricks/cli/libs/vfs" + "github.com/databricks/databricks-sdk-go" + apiclient "github.com/databricks/databricks-sdk-go/client" + "github.com/databricks/databricks-sdk-go/config" + "github.com/databricks/databricks-sdk-go/service/workspace" +) + +// ----- shape: tree depth and branching used to spread files across dirs ---- + +type treeShape struct { + name string + depth int + branch int // unused when depth == 0 (flat) +} + +// shapes ranges from "flat" (no nesting) to "large" (deep tree, many dirs). +// Tweak these values if you want to explore broader/narrower mixes. +var shapes = []treeShape{ + {"flat", 0, 0}, // 1 leaf dir + {"small", 2, 2}, // 4 leaf dirs + {"medium", 4, 2}, // 16 leaf dirs + {"large", 6, 2}, // 64 leaf dirs +} + +// generatePaths returns n relative file paths arranged into a tree of the +// requested shape. Filenames are unique; no extension is added (callers add +// the right one for the content kind they're using). +func generatePaths(shape treeShape, n int) []string { + if shape.depth == 0 { + out := make([]string, n) + for i := 0; i < n; i++ { + out[i] = fmt.Sprintf("f-%05d", i) + } + return out + } + leaves := 1 + for i := 0; i < shape.depth; i++ { + leaves *= shape.branch + } + filesPerLeaf := (n + leaves - 1) / leaves + if filesPerLeaf < 1 { + filesPerLeaf = 1 + } + var paths []string + var walk func(prefix string, d int) + walk = func(prefix string, d int) { + if d == 0 { + for f := 0; f < filesPerLeaf && len(paths) < n; f++ { + paths = append(paths, path.Join(prefix, fmt.Sprintf("f-%d", f))) + } + return + } + for b := 0; b < shape.branch; b++ { + if len(paths) >= n { + return + } + walk(path.Join(prefix, fmt.Sprintf("d%d", b)), d-1) + } + } + walk("", shape.depth) + return paths +} + +// ----- environment + helpers ------------------------------------------------ + +type benchEnv struct { + wc *databricks.WorkspaceClient + apiClient *apiclient.DatabricksClient + username string + root string // absolute workspace path, no trailing slash +} + +func newBenchEnv(tb testing.TB) *benchEnv { + tb.Helper() + profile := os.Getenv("DATABRICKS_BENCH_PROFILE") + user := os.Getenv("DATABRICKS_BENCH_USER") + if profile == "" || user == "" { + tb.Skip("DATABRICKS_BENCH_PROFILE and DATABRICKS_BENCH_USER must be set") + } + wc, err := databricks.NewWorkspaceClient(&databricks.Config{Profile: profile}) + if err != nil { + tb.Fatalf("workspace client: %v", err) + } + c, err := apiclient.New(&config.Config{Profile: profile}) + if err != nil { + tb.Fatalf("api client: %v", err) + } + tag := make([]byte, 4) + _, _ = rand.Read(tag) + root := fmt.Sprintf("/Users/%s/.tmp/sync-bench-%s", user, hex.EncodeToString(tag)) + if err := wc.Workspace.MkdirsByPath(context.Background(), root); err != nil { + tb.Fatalf("mkdir root: %v", err) + } + tb.Cleanup(func() { + _ = wc.Workspace.Delete(context.Background(), workspace.Delete{Path: root, Recursive: true}) + }) + return &benchEnv{wc: wc, apiClient: c, username: user, root: root} +} + +// importRaw uploads body bytes via the legacy import-file endpoint. Used for +// setup only — what we benchmark is the listing / sync path, not the upload. +func importRaw(ctx context.Context, c *apiclient.DatabricksClient, absPath string, body []byte) error { + urlPath := fmt.Sprintf("/api/2.0/workspace-files/import-file/%s?overwrite=true", + url.PathEscape(strings.TrimLeft(absPath, "/"))) + return c.Do(ctx, http.MethodPost, urlPath, nil, nil, body, nil) +} + +// populate creates all parent dirs under remoteDir, then uploads (relPath → +// body) entries via a 16-worker pool. Setup helper for benchmarks. +func populate(tb testing.TB, env *benchEnv, remoteDir string, items map[string][]byte) { + tb.Helper() + ctx := context.Background() + if err := env.wc.Workspace.MkdirsByPath(ctx, remoteDir); err != nil { + tb.Fatalf("mkdir %s: %v", remoteDir, err) + } + parents := map[string]struct{}{} + for rel := range items { + if d := path.Dir(rel); d != "." && d != "/" { + parents[d] = struct{}{} + } + } + parentList := make([]string, 0, len(parents)) + for d := range parents { + parentList = append(parentList, d) + } + sort.Strings(parentList) + + // Parallelize the mkdirs — for deep shapes the serial loop dominates + // fixture setup time. + mkdirJobs := make(chan string, len(parentList)) + for _, d := range parentList { + mkdirJobs <- d + } + close(mkdirJobs) + var mkdirWG stdsync.WaitGroup + var mkdirErr atomic.Pointer[error] + for i := 0; i < 16; i++ { + mkdirWG.Go(func() { + for d := range mkdirJobs { + if err := env.wc.Workspace.MkdirsByPath(ctx, path.Join(remoteDir, d)); err != nil { + e := fmt.Errorf("mkdir %s: %w", d, err) + mkdirErr.CompareAndSwap(nil, &e) + } + } + }) + } + mkdirWG.Wait() + if e := mkdirErr.Load(); e != nil { + tb.Fatalf("%v", *e) + } + type job struct { + rel string + body []byte + } + jobs := make(chan job, len(items)) + for r, b := range items { + jobs <- job{r, b} + } + close(jobs) + var wg stdsync.WaitGroup + var failed atomic.Int64 + for i := 0; i < 16; i++ { + wg.Go(func() { + for j := range jobs { + if err := importRaw(ctx, env.apiClient, path.Join(remoteDir, j.rel), j.body); err != nil { + failed.Add(1) + } + } + }) + } + wg.Wait() + if f := failed.Load(); f > 0 { + tb.Logf("warning: %d uploads failed during populate", f) + } +} + +// generators returns body generators for each content kind, sized roughly to +// 200 bytes so list-repo response time isn't dominated by per-file content. +func generators() map[string]func(i int) (suffix string, body []byte) { + pad := func(s string, n int) []byte { + if n <= len(s) { + return []byte(s[:n]) + } + buf := make([]byte, n) + copy(buf, s) + for i := len(s); i < n; i++ { + buf[i] = byte('a' + (i % 26)) + } + return buf + } + return map[string]func(i int) (string, []byte){ + "file": func(i int) (string, []byte) { + return ".txt", pad(fmt.Sprintf("plain file %d\n", i), 200) + }, + "py-notebook": func(i int) (string, []byte) { + return ".py", pad(fmt.Sprintf("# Databricks notebook source\nprint('%d')\n", i), 200) + }, + "sql-notebook": func(i int) (string, []byte) { + return ".sql", pad(fmt.Sprintf("-- Databricks notebook source\nSELECT %d;\n", i), 200) + }, + "ipynb": func(i int) (string, []byte) { + return ".ipynb", pad(fmt.Sprintf(`{"cells":[{"cell_type":"code","source":["# Databricks notebook source\n","print(%d)"],"outputs":[],"execution_count":null,"metadata":{}}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"}},"nbformat":4,"nbformat_minor":4}`, i), 200) + }, + "dashboard": func(i int) (string, []byte) { + return ".lvdash.json", pad(`{"datasets":[],"pages":[{"name":"p","displayName":"P","layout":[]}]}`, 200) + }, + } +} + +// itemsForCell builds the (relPath → body) map for a (shape, n, kindKey) cell. +func itemsForCell(shape treeShape, n int, kindKey string) map[string][]byte { + gen := generators()[kindKey] + paths := generatePaths(shape, n) + out := make(map[string][]byte, len(paths)) + for i, rel := range paths { + suf, body := gen(i) + out[rel+suf] = body + } + return out +} + +// itemsMixed builds a map where each file is a different kind in a fixed +// rotation. Used by the content-mix benchmark. +func itemsMixed(shape treeShape, n int) map[string][]byte { + gens := generators() + kinds := []string{"file", "py-notebook", "ipynb", "dashboard", "sql-notebook"} + paths := generatePaths(shape, n) + out := make(map[string][]byte, len(paths)) + for i, rel := range paths { + suf, body := gens[kinds[i%len(kinds)]](i) + out[rel+suf] = body + } + return out +} + +// ----- parallel-walk runner (test-only alternative to list-repo) ------------ + +// parallelWalk lists everything under root by issuing non-recursive +// /api/2.0/workspace/list calls and recursing into directories from the +// client side, with up to `workers` outstanding calls in flight. It serves as +// a baseline to compare against the recursive list-repo endpoint. +// +// Test-only: we do not ship this in production. The list-repo path is +// strictly faster for any nested tree (see benchmark numbers). +func parallelWalk(ctx context.Context, c *apiclient.DatabricksClient, root string, workers int) ([]filer.RemoteFileMetadata, error) { + type listObject struct { + ObjectType string `json:"object_type"` + Path string `json:"path"` + ContentSHA256Hex string `json:"content_sha256_hex"` + HasWsfsMetadata bool `json:"has_wsfs_metadata"` + } + type listResp struct { + Objects []listObject `json:"objects"` + } + + sem := make(chan struct{}, workers) + var ( + mu stdsync.Mutex + results []filer.RemoteFileMetadata + firstErr error + wg stdsync.WaitGroup + ) + + var walk func(dir string) + walk = func(dir string) { + defer wg.Done() + sem <- struct{}{} + defer func() { <-sem }() + + var resp listResp + body := map[string]any{"path": dir, "return_wsfs_metadata": true} + if err := c.Do(ctx, http.MethodGet, "/api/2.0/workspace/list", nil, nil, body, &resp); err != nil { + mu.Lock() + if firstErr == nil { + firstErr = err + } + mu.Unlock() + return + } + for _, o := range resp.Objects { + if o.ObjectType == "DIRECTORY" { + wg.Add(1) + go walk(o.Path) + continue + } + if o.ContentSHA256Hex == "" { + continue + } + mu.Lock() + results = append(results, filer.RemoteFileMetadata{ + Path: o.Path, + ContentSHA256Hex: o.ContentSHA256Hex, + ObjectType: o.ObjectType, + }) + mu.Unlock() + } + } + wg.Add(1) + go walk(root) + wg.Wait() + return results, firstErr +} + +// ----- benchmarks ---------------------------------------------------------- + +// BenchmarkListRepo measures /workspace/list-repo cost vs (shape, N) at fixed +// content (plain files). +func BenchmarkListRepo(b *testing.B) { + env := newBenchEnv(b) + wfc, err := filer.NewWorkspaceFilesClient(env.wc, env.root) + if err != nil { + b.Fatalf("filer: %v", err) + } + lister := wfc.(*filer.WorkspaceFilesClient) + + counts := []int{10, 50} + for _, shape := range shapes { + for _, n := range counts { + b.Run(fmt.Sprintf("shape=%s/N=%d", shape.name, n), func(b *testing.B) { + dir := path.Join(env.root, fmt.Sprintf("listrepo-%s-%d", shape.name, n)) + populate(b, env, dir, itemsForCell(shape, n, "file")) + _, _ = lister.ListWithSHAs(context.Background(), dir) + b.ResetTimer() + for i := 0; i < b.N; i++ { + if _, err := lister.ListWithSHAs(context.Background(), dir); err != nil { + b.Fatalf("list: %v", err) + } + } + }) + } + } +} + +// BenchmarkListRepoByContent measures /workspace/list-repo cost across +// content mixes, at every shape, fixed N=200. +func BenchmarkListRepoByContent(b *testing.B) { + const N = 200 + env := newBenchEnv(b) + wfc, err := filer.NewWorkspaceFilesClient(env.wc, env.root) + if err != nil { + b.Fatalf("filer: %v", err) + } + lister := wfc.(*filer.WorkspaceFilesClient) + + contents := []string{"file", "py-notebook", "ipynb", "sql-notebook", "dashboard"} + for _, shape := range shapes { + for _, kind := range contents { + b.Run(fmt.Sprintf("shape=%s/content=%s", shape.name, kind), func(b *testing.B) { + dir := path.Join(env.root, fmt.Sprintf("content-%s-%s", shape.name, kind)) + populate(b, env, dir, itemsForCell(shape, N, kind)) + _, _ = lister.ListWithSHAs(context.Background(), dir) + b.ResetTimer() + for i := 0; i < b.N; i++ { + if _, err := lister.ListWithSHAs(context.Background(), dir); err != nil { + b.Fatalf("list: %v", err) + } + } + }) + } + b.Run(fmt.Sprintf("shape=%s/content=mixed", shape.name), func(b *testing.B) { + dir := path.Join(env.root, fmt.Sprintf("content-%s-mixed", shape.name)) + populate(b, env, dir, itemsMixed(shape, N)) + _, _ = lister.ListWithSHAs(context.Background(), dir) + b.ResetTimer() + for i := 0; i < b.N; i++ { + if _, err := lister.ListWithSHAs(context.Background(), dir); err != nil { + b.Fatalf("list: %v", err) + } + } + }) + } +} + +// BenchmarkListWalkers compares /workspace/list-repo (recursive, server-side) +// against the test-only parallel client-side walk, across (shape, N, workers). +// Plain-file content; the goal is to characterize the listing strategies +// themselves. +func BenchmarkListWalkers(b *testing.B) { + env := newBenchEnv(b) + wfc, err := filer.NewWorkspaceFilesClient(env.wc, env.root) + if err != nil { + b.Fatalf("filer: %v", err) + } + lister := wfc.(*filer.WorkspaceFilesClient) + + counts := []int{100, 500} + workerCounts := []int{8, 32} + for _, shape := range shapes { + for _, n := range counts { + dir := path.Join(env.root, fmt.Sprintf("walkers-%s-%d", shape.name, n)) + populate(b, env, dir, itemsForCell(shape, n, "file")) + _, _ = lister.ListWithSHAs(context.Background(), dir) + + b.Run(fmt.Sprintf("shape=%s/N=%d/impl=list-repo", shape.name, n), func(b *testing.B) { + for i := 0; i < b.N; i++ { + if _, err := lister.ListWithSHAs(context.Background(), dir); err != nil { + b.Fatalf("list-repo: %v", err) + } + } + }) + for _, w := range workerCounts { + w := w + b.Run(fmt.Sprintf("shape=%s/N=%d/impl=parallel-w%d", shape.name, n, w), func(b *testing.B) { + for i := 0; i < b.N; i++ { + if _, err := parallelWalk(context.Background(), env.apiClient, dir, w); err != nil { + b.Fatalf("parallel-walk: %v", err) + } + } + }) + } + } + } +} + +// BenchmarkSyncRunOnceColdSnapshot times an end-to-end Sync.RunOnce against a +// pre-warmed remote (the CI-cold-runner case). Sub-benchmarks compare with- +// and without-Layer-3 across (shape, N) cells. Plain-file content. +func BenchmarkSyncRunOnceColdSnapshot(b *testing.B) { + env := newBenchEnv(b) + user, err := env.wc.CurrentUser.Me(context.Background()) + if err != nil { + b.Fatalf("Me: %v", err) + } + + counts := []int{20, 100} + for _, shape := range shapes { + for _, n := range counts { + b.Run(fmt.Sprintf("shape=%s/N=%d", shape.name, n), func(b *testing.B) { + localDir := b.TempDir() + gen := generators()["file"] + for i, rel := range generatePaths(shape, n) { + suf, body := gen(i) + p := filepath.Join(localDir, filepath.FromSlash(rel)+suf) + if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil { + b.Fatalf("mkdir: %v", err) + } + if err := os.WriteFile(p, body, 0o644); err != nil { + b.Fatalf("write %s: %v", p, err) + } + } + remoteDir := path.Join(env.root, fmt.Sprintf("sync-%s-%d", shape.name, n)) + if err := env.wc.Workspace.MkdirsByPath(context.Background(), remoteDir); err != nil { + b.Fatalf("mkdir: %v", err) + } + + runOnce := func(b *testing.B, withLayer3 bool) { + _ = os.RemoveAll(filepath.Join(localDir, ".databricks")) + localRoot := vfs.MustNew(localDir) + snapBase := b.TempDir() + s, err := New(context.Background(), SyncOptions{ + WorktreeRoot: localRoot, + LocalRoot: localRoot, + Paths: []string{"."}, + RemotePath: remoteDir, + SnapshotBasePath: snapBase, + WorkspaceClient: env.wc, + CurrentUser: user, + }) + if err != nil { + b.Fatalf("Sync.New: %v", err) + } + if !withLayer3 { + s.remoteFilter = nil + } + if _, err := s.RunOnce(context.Background()); err != nil { + b.Fatalf("RunOnce: %v", err) + } + s.Close() + } + + // Pre-warm so the workspace already has every file. + runOnce(b, true) + b.ResetTimer() + + b.Run("with-layer3", func(b *testing.B) { + for i := 0; i < b.N; i++ { + runOnce(b, true) + } + }) + b.Run("without-layer3", func(b *testing.B) { + for i := 0; i < b.N; i++ { + runOnce(b, false) + } + }) + }) + } + } +} diff --git a/libs/testserver/fake_workspace.go b/libs/testserver/fake_workspace.go index 5430c68cbcc..36596b58252 100644 --- a/libs/testserver/fake_workspace.go +++ b/libs/testserver/fake_workspace.go @@ -2,7 +2,9 @@ package testserver import ( "bytes" + "crypto/sha256" "encoding/binary" + "encoding/hex" "encoding/json" "fmt" "os" @@ -454,6 +456,56 @@ func (s *FakeWorkspace) WorkspaceFilesExportFile(path string) []byte { return s.files[path].Data } +// WorkspaceListRepo returns the recursive listing under root used by the +// /api/2.0/workspace/list-repo endpoint. The CLI calls this with +// return_wsfs_metadata=true, expecting a content_sha256_hex per file/notebook. +func (s *FakeWorkspace) WorkspaceListRepo(root string) any { + if !strings.HasPrefix(root, "/") { + root = "/" + root + } + + defer s.LockUnlock()() + + type listObject struct { + ObjectType string `json:"object_type"` + Path string `json:"path"` + ContentSHA256Hex string `json:"content_sha256_hex,omitempty"` + HasWsfsMetadata bool `json:"has_wsfs_metadata,omitempty"` + Size int `json:"size,omitempty"` + Language string `json:"language,omitempty"` + } + + var objects []listObject + if _, ok := s.directories[root]; ok { + objects = append(objects, listObject{ObjectType: "DIRECTORY", Path: root}) + } + for p, dir := range s.directories { + if p == root || !strings.HasPrefix(p, root+"/") { + continue + } + objects = append(objects, listObject{ObjectType: "DIRECTORY", Path: dir.Path}) + } + for p, fe := range s.files { + if !strings.HasPrefix(p, root+"/") { + continue + } + sum := sha256.Sum256(fe.Data) + obj := listObject{ + ObjectType: string(fe.Info.ObjectType), + Path: fe.Info.Path, + ContentSHA256Hex: hex.EncodeToString(sum[:]), + HasWsfsMetadata: true, + Size: len(fe.Data), + } + if fe.Info.Language != "" { + obj.Language = string(fe.Info.Language) + } + objects = append(objects, obj) + } + + return map[string]any{"objects": objects} +} + // FileExists checks if a file exists at the given path. func (s *FakeWorkspace) FileExists(path string) bool { if !strings.HasPrefix(path, "/") { diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index 8bd53391841..1ebab18051f 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -79,6 +79,11 @@ func AddDefaultHandlers(server *Server) { return req.Workspace.WorkspaceGetStatus(path) }) + server.Handle("GET", "/api/2.0/workspace/list-repo", func(req Request) any { + path := req.URL.Query().Get("path") + return req.Workspace.WorkspaceListRepo(path) + }) + server.Handle("POST", "/api/2.0/workspace/mkdirs", func(req Request) any { var request workspace.Mkdirs if err := json.Unmarshal(req.Body, &request); err != nil {