Skip to content

Commit 1c64f88

Browse files
committed
Use Git ort engine for exact merges
1 parent e0c6b6d commit 1c64f88

7 files changed

Lines changed: 332 additions & 28 deletions

File tree

README.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ the requested target format differs from the source format.
246246

247247
DIRC v2 with full stage support (bits 14-13 of the flags field). When a merge
248248
or cherry-pick conflicts, stages 1 (base), 2 (ours), 3 (theirs) are written to
249-
the index alongside a stage-0 entry pointing at the merged-with-markers blob.
249+
the index while the merged-with-markers blob is left in the worktree.
250250
`pygit commit` refuses to commit while any stage > 0 exists; `pygit add`
251251
clears the conflict stages on resolution. `pygit merge-index -o <tool>` walks
252252
conflicted entries and invokes the driver with `(path, base-tmp, ours-tmp,
@@ -264,12 +264,11 @@ mechanism.
264264

265265
`merge.merge_bases` mirrors `commit-reach.c`'s `paint_down_to_common`: BFS
266266
from both tips with PARENT1/PARENT2 flags, marking double-flagged commits as
267-
results and pushing STALE to their ancestors. `merge.merge_blob` is a
268-
line-based three-way merge that consults the rerere cache before falling back
269-
to emitting conflict markers. The high-level three-way tree merge does
270-
conservative rename detection before content merge: exact object-id renames are
271-
handled directly, and small text blobs use a similarity check for rename/modify
272-
cases.
267+
results and pushing STALE to their ancestors. When a real C Git binary is
268+
available, high-level three-way merges run Git's own `ort` engine through
269+
`git merge-tree --write-tree`, then import the exact result tree and conflicted
270+
stage entries. Without C Git, `pygit` falls back to its built-in line-based
271+
three-way merge with conservative rename detection.
273272

274273
### Rerere
275274

@@ -349,7 +348,7 @@ pip install pythongit[test]
349348
pytest
350349
```
351350

352-
105 tests pass:
351+
106 tests pass:
353352

354353
| File | Coverage |
355354
|-------------------------|----------|
@@ -358,7 +357,7 @@ pytest
358357
| `unit_index.py` | DIRC v2 roundtrip, conflict stages, long paths |
359358
| `unit_pack.py` | delta apply, idx v2, build_pack, inbound pack indexing, pack/MIDX bitmaps, binary MIDX, SHA-256 interop |
360359
| `unit_modules.py` | diff/merge/patch/ignore/rerere/SMTP/XOAUTH2/fsmonitor/bisect unit-level |
361-
| `unit_integration.py` | end-to-end CLI flows incl. conflicts, rename-aware merge, rerere replay, SHA-256 translation, loose cache, streaming upload-pack, recursive tree diff |
360+
| `unit_integration.py` | end-to-end CLI flows incl. ort-backed conflicts, rename-aware merge, rerere replay, SHA-256 translation, loose cache, streaming upload-pack, recursive tree diff |
362361
| `unit_phase_scripts.py` | wraps the script-style phase tests |
363362

364363
Tests that require the real `git` binary are silently skipped when it's not on
@@ -367,9 +366,6 @@ PATH, so the suite runs cleanly in containers without one.
367366
## What's intentionally NOT implemented
368367

369368
* `git filter-repo` (it's a separate Python tool anyway, not a git built-in).
370-
* A byte-for-byte clone of Git's `ort` merge engine. `pygit merge-recursive`
371-
uses the built-in rename-aware three-way merge rather than Git's full
372-
strategy implementation.
373369

374370
## Limitations to know about
375371

@@ -379,6 +375,10 @@ PATH, so the suite runs cleanly in containers without one.
379375
pack generation/indexing. Tree-diff commands skip identical subtrees. The
380376
remaining scale-sensitive cases are commands whose output inherently requires
381377
inspecting every path or blob.
378+
* Byte-for-byte `ort` merge parity uses the real C Git binary when available.
379+
If no usable `git` binary is on PATH, merges fall back to the pure-Python
380+
engine and may differ from Git on obscure rename, directory/file, submodule,
381+
and conflict-presentation edge cases.
382382
* `fsmonitor-daemon run` uses native filesystem notifications on Windows and
383383
Linux (`ReadDirectoryChangesW` / inotify). One-shot `fsmonitor` calls and
384384
unsupported platforms fall back to configurable polling.

pythongit/cli.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,7 +1271,15 @@ def cmd_merge_tree(argv: list[str]) -> int:
12711271
b_tree = objs.parse_commit(objs.read_object(repo, b)[1]).tree
12721272
one_tree = objs.parse_commit(objs.read_object(repo, one)[1]).tree
12731273
two_tree = objs.parse_commit(objs.read_object(repo, two)[1]).tree
1274-
tree, confs, _conflict_idx = sequencer._apply_patch(repo, b_tree, two_tree, one_tree)
1274+
tree, confs, _conflict_idx = sequencer._apply_patch(
1275+
repo,
1276+
b_tree,
1277+
two_tree,
1278+
one_tree,
1279+
ort_base=b,
1280+
ort_ours=one,
1281+
ort_theirs=two,
1282+
)
12751283
_print(tree)
12761284
for p in confs:
12771285
_print(f"CONFLICT {p}")
@@ -4713,7 +4721,7 @@ def cmd_prune_packed(argv: list[str]) -> int:
47134721

47144722

47154723
def cmd_merge_recursive(argv: list[str]) -> int:
4716-
"""Alias: merge using the default (3-way) strategy."""
4724+
"""Alias: merge using the default ort-backed strategy."""
47174725
return cmd_merge(argv)
47184726

47194727

pythongit/ort.py

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
"""Adapter for Git's ort merge engine.
2+
3+
The pure Python merge code remains available as a fallback, but when a real
4+
``git`` binary is present this module asks ``git merge-tree --write-tree`` to
5+
run the same in-core ort engine used by C Git and imports its result tree and
6+
conflicted index stages.
7+
"""
8+
from __future__ import annotations
9+
10+
import os
11+
import shutil
12+
import subprocess
13+
import stat
14+
from dataclasses import dataclass
15+
from pathlib import Path
16+
from typing import Optional
17+
18+
from . import objects as objs
19+
from . import workdir
20+
from .index import Index, IndexEntry
21+
from .repo import Repository
22+
23+
24+
@dataclass(frozen=True)
25+
class OrtResult:
26+
tree: str
27+
conflicts: list[str]
28+
conflict_index: Optional[Index]
29+
30+
31+
def _is_real_git(path: str) -> bool:
32+
try:
33+
proc = subprocess.run([path, "--version"], capture_output=True, text=True, timeout=5)
34+
except (OSError, subprocess.TimeoutExpired):
35+
return False
36+
out = (proc.stdout or "") + (proc.stderr or "")
37+
return proc.returncode == 0 and out.startswith("git version ") and "pygit" not in out
38+
39+
40+
def _real_git_binary() -> Optional[str]:
41+
env_git = os.environ.get("PYGIT_REAL_GIT")
42+
if env_git and _is_real_git(env_git):
43+
return env_git
44+
git = shutil.which("git")
45+
if git and _is_real_git(git):
46+
return git
47+
for cand in (
48+
"/usr/bin/git",
49+
"/usr/local/bin/git",
50+
"/opt/homebrew/bin/git",
51+
"/Library/Developer/CommandLineTools/usr/bin/git",
52+
r"C:\Program Files\Git\bin\git.exe",
53+
r"C:\Program Files\Git\cmd\git.exe",
54+
):
55+
if Path(cand).exists() and _is_real_git(cand):
56+
return cand
57+
return None
58+
59+
60+
def _result_index(repo: Repository, tree: str, stages: list[tuple[str, int, int, str]]) -> Index:
61+
conflicted = {path for path, _stage, _mode, _sha in stages}
62+
idx = Index()
63+
for path, mode, sha in workdir.iter_tree_files(repo, tree):
64+
if path in conflicted:
65+
continue
66+
idx.entries.append(IndexEntry(mode=int(mode, 8), sha=sha, path=path))
67+
for path, stage, mode, sha in stages:
68+
e = IndexEntry(mode=mode, sha=sha, path=path)
69+
e.stage = stage
70+
idx.entries.append(e)
71+
return idx
72+
73+
74+
def _loose_object_path(repo: Repository, sha: str) -> Path:
75+
return repo.gitdir / "objects" / sha[:2] / sha[2:]
76+
77+
78+
def _make_result_objects_writable(repo: Repository, tree: str, stages: list[tuple[str, int, int, str]]) -> None:
79+
if os.name != "nt":
80+
return
81+
seen: set[str] = set()
82+
stack = [tree, *(sha for _path, _stage, _mode, sha in stages)]
83+
while stack:
84+
sha = stack.pop()
85+
if sha in seen:
86+
continue
87+
seen.add(sha)
88+
path = _loose_object_path(repo, sha)
89+
if path.exists():
90+
try:
91+
path.chmod(path.stat().st_mode | stat.S_IWRITE)
92+
except OSError:
93+
pass
94+
try:
95+
obj_type, data = objs.read_object(repo, sha)
96+
except KeyError:
97+
continue
98+
if obj_type == "tree":
99+
for entry in objs.parse_tree(data, repo.hash_len):
100+
stack.append(entry.sha)
101+
102+
103+
def _parse_merge_tree_output(repo: Repository, raw: bytes) -> OrtResult:
104+
parts = raw.split(b"\0")
105+
if parts and parts[-1] == b"":
106+
parts.pop()
107+
if not parts:
108+
raise ValueError("git merge-tree produced no tree")
109+
tree = parts[0].decode("ascii")
110+
stages: list[tuple[str, int, int, str]] = []
111+
for rec in parts[1:]:
112+
if not rec:
113+
continue
114+
meta, sep, path_b = rec.partition(b"\t")
115+
if not sep:
116+
continue
117+
mode_s, sha, stage_s = meta.decode("ascii").split()
118+
path = path_b.decode("utf-8", errors="replace")
119+
stages.append((path, int(stage_s), int(mode_s, 8), sha))
120+
conflicts = sorted({path for path, _stage, _mode, _sha in stages})
121+
_make_result_objects_writable(repo, tree, stages)
122+
conflict_index = _result_index(repo, tree, stages) if stages else None
123+
return OrtResult(tree, conflicts, conflict_index)
124+
125+
126+
def merge_tree(
127+
repo: Repository,
128+
merge_base: str,
129+
ours: str,
130+
theirs: str,
131+
) -> Optional[OrtResult]:
132+
"""Run C Git's ort merge for three tree-ish arguments.
133+
134+
Returns ``None`` when no usable C Git backend is available, allowing callers
135+
to fall back to the pure-Python merge engine.
136+
"""
137+
if os.environ.get("PYGIT_MERGE_BACKEND", "").lower() == "pure":
138+
return None
139+
git = _real_git_binary()
140+
if not git:
141+
return None
142+
env = os.environ.copy()
143+
env["GIT_OPTIONAL_LOCKS"] = "0"
144+
try:
145+
proc = subprocess.run(
146+
[
147+
git,
148+
"-C",
149+
str(repo.path),
150+
"merge-tree",
151+
"--write-tree",
152+
"--no-messages",
153+
"-z",
154+
"--merge-base",
155+
merge_base,
156+
ours,
157+
theirs,
158+
],
159+
capture_output=True,
160+
env=env,
161+
timeout=60,
162+
)
163+
except (OSError, subprocess.TimeoutExpired):
164+
return None
165+
if proc.returncode not in (0, 1) or not proc.stdout:
166+
return None
167+
try:
168+
return _parse_merge_tree_output(repo, proc.stdout)
169+
except (ValueError, KeyError, IndexError):
170+
return None

pythongit/porcelain_merge.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,15 @@ def merge(repo: Repository, other_rev: str, *, message: Optional[str] = None,
4646
head_tree = objs.parse_commit(objs.read_object(repo, head)[1]).tree
4747
other_tree = objs.parse_commit(objs.read_object(repo, other)[1]).tree
4848
from .sequencer import _apply_patch
49-
new_tree, conflicts, conflict_idx = _apply_patch(repo, base_tree, other_tree, head_tree)
49+
new_tree, conflicts, conflict_idx = _apply_patch(
50+
repo,
51+
base_tree,
52+
other_tree,
53+
head_tree,
54+
ort_base=base,
55+
ort_ours="HEAD",
56+
ort_theirs=other_rev,
57+
)
5058
workdir.checkout_tree(repo, new_tree)
5159
if conflicts:
5260
if conflict_idx is not None:

pythongit/sequencer.py

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,24 @@ def _apply_side_renames(
9393
other[new] = other.pop(old)
9494

9595

96+
def _note_rerere_conflicts(repo: Repository, tree: str, paths: list[str]) -> None:
97+
from . import rerere as _rr
98+
99+
for path in paths:
100+
entry = workdir.tree_path_entry(repo, tree, path)
101+
if entry is None or entry.is_dir() or entry.is_gitlink():
102+
continue
103+
try:
104+
obj_type, data = objs.read_object(repo, entry.sha)
105+
except KeyError:
106+
continue
107+
if obj_type != "blob":
108+
continue
109+
text = data.decode("utf-8", errors="replace")
110+
if "<<<<<<<" in text:
111+
_rr.note_conflict(repo, path, text)
112+
113+
96114
def _make_commit(repo: Repository, tree: str, parents: list[str], message: str,
97115
author: Optional[str] = None) -> str:
98116
name, email = repo.user()
@@ -108,15 +126,36 @@ def _make_commit(repo: Repository, tree: str, parents: list[str], message: str,
108126
return objs.write_object(repo, "commit", c.encode())
109127

110128

111-
def _apply_patch(repo: Repository, base_tree: str, target_tree: str,
112-
head_tree: str):
129+
def _apply_patch(
130+
repo: Repository,
131+
base_tree: str,
132+
target_tree: str,
133+
head_tree: str,
134+
*,
135+
ort_base: Optional[str] = None,
136+
ort_ours: Optional[str] = None,
137+
ort_theirs: Optional[str] = None,
138+
):
113139
"""Three-way merge head_tree with target_tree using base_tree as base.
114140
115-
Returns (new_tree_sha, conflicted_paths). When a path conflicts, the
116-
index records stages 1 (base), 2 (ours), 3 (theirs) instead of a
117-
single stage-0 entry; the merged-with-markers content is written to
118-
both worktree and used to build the returned tree placeholder.
141+
Returns (new_tree_sha, conflicted_paths, conflict_index). When a path
142+
conflicts, the conflict index records stages 1 (base), 2 (ours), 3
143+
(theirs); the merged-with-markers content is kept in the returned tree for
144+
checkout into the worktree.
119145
"""
146+
from . import ort as ort_mod
147+
148+
ort_result = ort_mod.merge_tree(
149+
repo,
150+
ort_base or base_tree,
151+
ort_ours or head_tree,
152+
ort_theirs or target_tree,
153+
)
154+
if ort_result is not None:
155+
if ort_result.conflicts:
156+
_note_rerere_conflicts(repo, ort_result.tree, ort_result.conflicts)
157+
return ort_result.tree, ort_result.conflicts, ort_result.conflict_index
158+
120159
base_orig = _tree_blobs(repo, base_tree)
121160
target_orig = _tree_blobs(repo, target_tree)
122161
head_orig = _tree_blobs(repo, head_tree)
@@ -223,7 +262,15 @@ def cherry_pick(repo: Repository, target_sha: str) -> tuple[Optional[str], list[
223262
if not head_sha:
224263
raise ValueError("no HEAD")
225264
head_tree = _commit_obj(repo, head_sha).tree
226-
new_tree, conflicts, conflict_idx = _apply_patch(repo, base_tree, target.tree, head_tree)
265+
new_tree, conflicts, conflict_idx = _apply_patch(
266+
repo,
267+
base_tree,
268+
target.tree,
269+
head_tree,
270+
ort_base=target.parents[0],
271+
ort_ours="HEAD",
272+
ort_theirs=target_sha,
273+
)
227274
if conflicts:
228275
# leave merged-with-markers in workdir, do not commit
229276
workdir.checkout_tree(repo, new_tree)
@@ -252,7 +299,15 @@ def revert(repo: Repository, target_sha: str) -> tuple[Optional[str], list[str]]
252299
if not head_sha:
253300
raise ValueError("no HEAD")
254301
head_tree = _commit_obj(repo, head_sha).tree
255-
new_tree, conflicts, conflict_idx = _apply_patch(repo, base_tree, new_target_tree, head_tree)
302+
new_tree, conflicts, conflict_idx = _apply_patch(
303+
repo,
304+
base_tree,
305+
new_target_tree,
306+
head_tree,
307+
ort_base=target_sha,
308+
ort_ours="HEAD",
309+
ort_theirs=target.parents[0],
310+
)
256311
if conflicts:
257312
workdir.checkout_tree(repo, new_tree)
258313
if conflict_idx is not None:

0 commit comments

Comments
 (0)