diff --git a/CHANGELOG.md b/CHANGELOG.md index a76a10d28..41877a5bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added endpoint for searching commit history for a git repository. [#625](https://github.com/sourcebot-dev/sourcebot/pull/625) - Added `pushedAt` field to the Repo table to track when a repository last was committed to across all branches. [#790](https://github.com/sourcebot-dev/sourcebot/pull/790) +### Changed +- Added commit graph generation to improve performance for commit traversal operations. [#791](https://github.com/sourcebot-dev/sourcebot/pull/791) + ## [4.10.17] - 2026-01-23 ### Fixed diff --git a/docs/docs/connections/local-repos.mdx b/docs/docs/connections/local-repos.mdx index 114c3c8cc..62b76c692 100644 --- a/docs/docs/connections/local-repos.mdx +++ b/docs/docs/connections/local-repos.mdx @@ -55,7 +55,6 @@ To get Sourcebot to index these repositories: ## Examples - ```json @@ -76,6 +75,22 @@ To get Sourcebot to index these repositories: +## Optimizing git operations + +Sourcebot performs a number of operations that require traversing a repository's entire commit history (e.g., `git rev-list --count HEAD`). These operations can be slow in repositories with a large number of commits. + +Typically, a [commit graph](https://git-scm.com/docs/commit-graph) is generated to speed up these operations (see [#791](https://github.com/sourcebot-dev/sourcebot/pull/791)). However, since local repositories are treated as read-only, Sourcebot **will not** generate a commit graph for them. + +A commit graph can be manually generated by running the following command in the repository's root directory: +```sh +git commit-graph write --reachable +``` + +The commit graph can be updated when fetching with `--write-commit-graph`: +```sh +git fetch --write-commit-graph +``` + ## Schema reference diff --git a/packages/backend/src/git.ts b/packages/backend/src/git.ts index 4fa43ac8a..27160a3e5 100644 --- a/packages/backend/src/git.ts +++ b/packages/backend/src/git.ts @@ -82,6 +82,12 @@ export const cloneRepository = async ( keys: ["remote.origin.url"], signal, }); + + // @note: operations that need to iterate over a lot of commits (e.g., rev-list --count) + // can be slow on larger repositories. Commit graphs are a acceleration structure that + // speed up these operations. + // @see: https://git-scm.com/docs/commit-graph + await writeCommitGraph({ path, signal }); } catch (error: unknown) { const baseLog = `Failed to clone repository: ${path}`; @@ -121,7 +127,10 @@ export const fetchRepository = async ( cloneUrl, "+refs/heads/*:refs/heads/*", "--prune", - "--progress" + "--progress", + // On fetch, ensure the commit graph is up to date. + // @see: https://git-scm.com/docs/commit-graph + "--write-commit-graph" ]); // Update HEAD to match the remote's default branch. This handles the case where the remote's @@ -405,3 +414,27 @@ export const getLatestCommitTimestamp = async ({ return undefined; } } + +/** + * Writes or updates the commit-graph file for the repository. + * This pre-computes commit metadata to speed up operations like + * rev-list --count, log, and merge-base. + */ +export const writeCommitGraph = async ({ + path, + onProgress, + signal, +}: { + path: string, + onProgress?: onProgressFn, + signal?: AbortSignal, +}): Promise => { + const git = createGitClientForPath(path, onProgress, signal); + + try { + await git.raw(['commit-graph', 'write', '--reachable']); + } catch (error) { + // Don't throw an exception here since this is just a performance optimization. + logger.debug(`Failed to write commit-graph for ${path}:`, error); + } +}