diff --git a/CHANGELOG.md b/CHANGELOG.md
index a76a10d28..41877a5bf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added endpoint for searching commit history for a git repository. [#625](https://github.com/sourcebot-dev/sourcebot/pull/625)
- Added `pushedAt` field to the Repo table to track when a repository last was committed to across all branches. [#790](https://github.com/sourcebot-dev/sourcebot/pull/790)
+### Changed
+- Added commit graph generation to improve performance for commit traversal operations. [#791](https://github.com/sourcebot-dev/sourcebot/pull/791)
+
## [4.10.17] - 2026-01-23
### Fixed
diff --git a/docs/docs/connections/local-repos.mdx b/docs/docs/connections/local-repos.mdx
index 114c3c8cc..62b76c692 100644
--- a/docs/docs/connections/local-repos.mdx
+++ b/docs/docs/connections/local-repos.mdx
@@ -55,7 +55,6 @@ To get Sourcebot to index these repositories:
## Examples
-
```json
@@ -76,6 +75,22 @@ To get Sourcebot to index these repositories:
+## Optimizing git operations
+
+Sourcebot performs a number of operations that require traversing a repository's entire commit history (e.g., `git rev-list --count HEAD`). These operations can be slow in repositories with a large number of commits.
+
+Typically, a [commit graph](https://git-scm.com/docs/commit-graph) is generated to speed up these operations (see [#791](https://github.com/sourcebot-dev/sourcebot/pull/791)). However, since local repositories are treated as read-only, Sourcebot **will not** generate a commit graph for them.
+
+A commit graph can be manually generated by running the following command in the repository's root directory:
+```sh
+git commit-graph write --reachable
+```
+
+The commit graph can be updated when fetching with `--write-commit-graph`:
+```sh
+git fetch --write-commit-graph
+```
+
## Schema reference
diff --git a/packages/backend/src/git.ts b/packages/backend/src/git.ts
index 4fa43ac8a..27160a3e5 100644
--- a/packages/backend/src/git.ts
+++ b/packages/backend/src/git.ts
@@ -82,6 +82,12 @@ export const cloneRepository = async (
keys: ["remote.origin.url"],
signal,
});
+
+ // @note: operations that need to iterate over a lot of commits (e.g., rev-list --count)
+ // can be slow on larger repositories. Commit graphs are a acceleration structure that
+ // speed up these operations.
+ // @see: https://git-scm.com/docs/commit-graph
+ await writeCommitGraph({ path, signal });
} catch (error: unknown) {
const baseLog = `Failed to clone repository: ${path}`;
@@ -121,7 +127,10 @@ export const fetchRepository = async (
cloneUrl,
"+refs/heads/*:refs/heads/*",
"--prune",
- "--progress"
+ "--progress",
+ // On fetch, ensure the commit graph is up to date.
+ // @see: https://git-scm.com/docs/commit-graph
+ "--write-commit-graph"
]);
// Update HEAD to match the remote's default branch. This handles the case where the remote's
@@ -405,3 +414,27 @@ export const getLatestCommitTimestamp = async ({
return undefined;
}
}
+
+/**
+ * Writes or updates the commit-graph file for the repository.
+ * This pre-computes commit metadata to speed up operations like
+ * rev-list --count, log, and merge-base.
+ */
+export const writeCommitGraph = async ({
+ path,
+ onProgress,
+ signal,
+}: {
+ path: string,
+ onProgress?: onProgressFn,
+ signal?: AbortSignal,
+}): Promise => {
+ const git = createGitClientForPath(path, onProgress, signal);
+
+ try {
+ await git.raw(['commit-graph', 'write', '--reachable']);
+ } catch (error) {
+ // Don't throw an exception here since this is just a performance optimization.
+ logger.debug(`Failed to write commit-graph for ${path}:`, error);
+ }
+}