Skip to content

Commit 89a3482

Browse files
committed
diff: implement function to calculate patch ID
The upstream git project provides the ability to calculate a so-called patch ID. Quoting from git-patch-id(1): A "patch ID" is nothing but a sum of SHA-1 of the file diffs associated with a patch, with whitespace and line numbers ignored." Patch IDs can be used to identify two patches which are probably the same thing, e.g. when a patch has been cherry-picked to another branch. This commit implements a new function `git_diff_patchid`, which gets a patch and derives an OID from the diff. Note the different terminology here: a patch in libgit2 are the differences in a single file and a diff can contain multiple patches for different files. The implementation matches the upstream implementation and should derive the same OID for the same diff. In fact, some code has been directly derived from the upstream implementation. The upstream implementation has two different modes to calculate patch IDs, which is the stable and unstable mode. The old way of calculating the patch IDs was unstable in a sense that a different ordering the diffs was leading to different results. This oversight was fixed in git 1.9, but as git tries hard to never break existing workflows, the old and unstable way is still default. The newer and stable way does not care for ordering of the diff hunks, and in fact it is the mode that should probably be used today. So right now, we only implement the stable way of generating the patch ID.
1 parent fa94875 commit 89a3482

File tree

5 files changed

+313
-1
lines changed

5 files changed

+313
-1
lines changed

include/git2/diff.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,6 +1400,51 @@ GIT_EXTERN(int) git_diff_format_email_init_options(
14001400
git_diff_format_email_options *opts,
14011401
unsigned int version);
14021402

1403+
/**
1404+
* Patch ID options structure
1405+
*
1406+
* Initialize with `GIT_DIFF_PATCHID_OPTIONS_INIT` macro to
1407+
* correctly set the default values and version.
1408+
*/
1409+
typedef struct git_diff_patchid_options {
1410+
unsigned int version;
1411+
} git_diff_patchid_options;
1412+
1413+
#define GIT_DIFF_PATCHID_OPTIONS_VERSION 1
1414+
#define GIT_DIFF_PATCHID_OPTIONS_INIT { GIT_DIFF_PATCHID_OPTIONS_VERSION }
1415+
1416+
/**
1417+
* Initialize `git_diff_patchid_options` structure.
1418+
*
1419+
* Initializes the structure with default values. Equivalent to
1420+
* creating an instance with `GIT_DIFF_PATCHID_OPTIONS_INIT`.
1421+
*/
1422+
GIT_EXTERN(int) git_diff_patchid_init_options(
1423+
git_diff_patchid_options *opts,
1424+
unsigned int version);
1425+
1426+
/**
1427+
* Calculate the patch ID for the given patch.
1428+
*
1429+
* Calculate a stable patch ID for the given patch by summing the
1430+
* hash of the file diffs, ignoring whitespace and line numbers.
1431+
* This can be used to derive whether two diffs are the same with
1432+
* a high probability.
1433+
*
1434+
* Currently, this function only calculates stable patch IDs, as
1435+
* defined in git-patch-id(1), and should in fact generate the
1436+
* same IDs as the upstream git project does.
1437+
*
1438+
* @param out Pointer where the calculated patch ID shoul be
1439+
* stored
1440+
* @param diff The diff to calculate the ID for
1441+
* @param opts Options for how to calculate the patch ID. This is
1442+
* intended for future changes, as currently no options are
1443+
* available.
1444+
* @return 0 on success, an error code otherwise.
1445+
*/
1446+
GIT_EXTERN(int) git_diff_patchid(git_oid *out, git_diff *diff, git_diff_patchid_options *opts);
1447+
14031448
GIT_END_DECL
14041449

14051450
/** @} */

src/diff.c

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@
1919
#define DIFF_FLAG_SET(DIFF,FLAG,VAL) (DIFF)->opts.flags = \
2020
(VAL) ? ((DIFF)->opts.flags | (FLAG)) : ((DIFF)->opts.flags & ~(VAL))
2121

22+
struct patch_id_args {
23+
git_hash_ctx ctx;
24+
git_oid result;
25+
int first_file;
26+
};
27+
2228
GIT_INLINE(const char *) diff_delta__path(const git_diff_delta *delta)
2329
{
2430
const char *str = delta->old_file.path;
@@ -374,3 +380,141 @@ int git_diff_format_email_init_options(
374380
return 0;
375381
}
376382

383+
static int flush_hunk(git_oid *result, git_hash_ctx *ctx)
384+
{
385+
git_oid hash;
386+
unsigned short carry = 0;
387+
int error, i;
388+
389+
if ((error = git_hash_final(&hash, ctx)) < 0 ||
390+
(error = git_hash_init(ctx)) < 0)
391+
return error;
392+
393+
for (i = 0; i < GIT_OID_RAWSZ; i++) {
394+
carry += result->id[i] + hash.id[i];
395+
result->id[i] = carry;
396+
carry >>= 8;
397+
}
398+
399+
return 0;
400+
}
401+
402+
static void strip_spaces(git_buf *buf)
403+
{
404+
char *src = buf->ptr, *dst = buf->ptr;
405+
char c;
406+
size_t len = 0;
407+
408+
while ((c = *src++) != '\0') {
409+
if (!git__isspace(c)) {
410+
*dst++ = c;
411+
len++;
412+
}
413+
}
414+
415+
git_buf_truncate(buf, len);
416+
}
417+
418+
static int file_cb(
419+
const git_diff_delta *delta,
420+
float progress,
421+
void *payload)
422+
{
423+
struct patch_id_args *args = (struct patch_id_args *) payload;
424+
git_buf buf = GIT_BUF_INIT;
425+
int error;
426+
427+
GIT_UNUSED(progress);
428+
429+
if (!args->first_file &&
430+
(error = flush_hunk(&args->result, &args->ctx)) < 0)
431+
goto out;
432+
args->first_file = 0;
433+
434+
if ((error = git_buf_printf(&buf,
435+
"diff--gita/%sb/%s---a/%s+++b/%s",
436+
delta->old_file.path,
437+
delta->new_file.path,
438+
delta->old_file.path,
439+
delta->new_file.path)) < 0)
440+
goto out;
441+
442+
strip_spaces(&buf);
443+
444+
if ((error = git_hash_update(&args->ctx, buf.ptr, buf.size)) < 0)
445+
goto out;
446+
447+
out:
448+
git_buf_free(&buf);
449+
return error;
450+
}
451+
452+
static int line_cb(
453+
const git_diff_delta *delta,
454+
const git_diff_hunk *hunk,
455+
const git_diff_line *line,
456+
void *payload)
457+
{
458+
struct patch_id_args *args = (struct patch_id_args *) payload;
459+
git_buf buf = GIT_BUF_INIT;
460+
int error;
461+
462+
GIT_UNUSED(delta);
463+
GIT_UNUSED(hunk);
464+
465+
switch (line->origin) {
466+
case GIT_DIFF_LINE_ADDITION:
467+
git_buf_putc(&buf, '+');
468+
break;
469+
case GIT_DIFF_LINE_DELETION:
470+
git_buf_putc(&buf, '-');
471+
break;
472+
case GIT_DIFF_LINE_CONTEXT:
473+
break;
474+
default:
475+
giterr_set(GITERR_PATCH, "invalid line origin for patch");
476+
return -1;
477+
}
478+
479+
git_buf_put(&buf, line->content, line->content_len);
480+
strip_spaces(&buf);
481+
482+
if ((error = git_hash_update(&args->ctx, buf.ptr, buf.size)) < 0)
483+
goto out;
484+
485+
out:
486+
git_buf_free(&buf);
487+
return error;
488+
}
489+
490+
int git_diff_patchid_init_options(git_diff_patchid_options *opts, unsigned int version)
491+
{
492+
GIT_INIT_STRUCTURE_FROM_TEMPLATE(
493+
opts, version, git_diff_patchid_options, GIT_DIFF_PATCHID_OPTIONS_INIT);
494+
return 0;
495+
}
496+
497+
int git_diff_patchid(git_oid *out, git_diff *diff, git_diff_patchid_options *opts)
498+
{
499+
struct patch_id_args args;
500+
int error;
501+
502+
GITERR_CHECK_VERSION(
503+
opts, GIT_DIFF_PATCHID_OPTIONS_VERSION, "git_diff_patchid_options");
504+
505+
memset(&args, 0, sizeof(args));
506+
args.first_file = 1;
507+
if ((error = git_hash_ctx_init(&args.ctx)) < 0)
508+
goto out;
509+
510+
if ((error = git_diff_foreach(diff, file_cb, NULL, NULL, line_cb, &args)) < 0)
511+
goto out;
512+
513+
if ((error = (flush_hunk(&args.result, &args.ctx))) < 0)
514+
goto out;
515+
516+
git_oid_cpy(out, &args.result);
517+
518+
out:
519+
return error;
520+
}

tests/core/structinit.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,4 +176,8 @@ void test_core_structinit__compare(void)
176176
CHECK_MACRO_FUNC_INIT_EQUAL( \
177177
git_proxy_options, GIT_PROXY_OPTIONS_VERSION, \
178178
GIT_PROXY_OPTIONS_INIT, git_proxy_init_options);
179+
180+
CHECK_MACRO_FUNC_INIT_EQUAL( \
181+
git_diff_patchid_options, GIT_DIFF_PATCHID_OPTIONS_VERSION, \
182+
GIT_DIFF_PATCHID_OPTIONS_INIT, git_diff_patchid_init_options);
179183
}

tests/diff/patchid.c

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#include "clar_libgit2.h"
2+
#include "patch/patch_common.h"
3+
4+
static void verify_patch_id(const char *diff_content, const char *expected_id)
5+
{
6+
git_oid expected_oid, actual_oid;
7+
git_diff *diff;
8+
9+
cl_git_pass(git_oid_fromstr(&expected_oid, expected_id));
10+
cl_git_pass(git_diff_from_buffer(&diff, diff_content, strlen(diff_content)));
11+
cl_git_pass(git_diff_patchid(&actual_oid, diff, NULL));
12+
13+
cl_assert_equal_oid(&expected_oid, &actual_oid);
14+
15+
git_diff_free(diff);
16+
}
17+
18+
void test_diff_patchid__simple_commit(void)
19+
{
20+
verify_patch_id(PATCH_SIMPLE_COMMIT, "06094b1948b878b7d9ff7560b4eae672a014b0ec");
21+
}
22+
23+
void test_diff_patchid__filename_with_spaces(void)
24+
{
25+
verify_patch_id(PATCH_APPEND_NO_NL, "f0ba05413beaef743b630e796153839462ee477a");
26+
}
27+
28+
void test_diff_patchid__multiple_hunks(void)
29+
{
30+
verify_patch_id(PATCH_MULTIPLE_HUNKS, "81e26c34643d17f521e57c483a6a637e18ba1f57");
31+
}
32+
33+
void test_diff_patchid__multiple_files(void)
34+
{
35+
verify_patch_id(PATCH_MULTIPLE_FILES, "192d1f49d23f2004517963aecd3f8a6c467f50ff");
36+
}
37+
38+
void test_diff_patchid__same_diff_with_differing_whitespace_has_same_id(void)
39+
{
40+
const char *tabs =
41+
"diff --git a/file.txt b/file.txt\n"
42+
"index 8fecc09..1d43a92 100644\n"
43+
"--- a/file.txt\n"
44+
"+++ b/file.txt\n"
45+
"@@ -1 +1 @@\n"
46+
"-old text\n"
47+
"+ new text\n";
48+
const char *spaces =
49+
"diff --git a/file.txt b/file.txt\n"
50+
"index 8fecc09..1d43a92 100644\n"
51+
"--- a/file.txt\n"
52+
"+++ b/file.txt\n"
53+
"@@ -1 +1 @@\n"
54+
"-old text\n"
55+
"+ new text\n";
56+
const char *id = "11efdd13c30f7a1056eac2ae2fb952da475e2c23";
57+
58+
verify_patch_id(tabs, id);
59+
verify_patch_id(spaces, id);
60+
}

tests/patch/patch_common.h

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,66 @@
253253
"@@ -9,0 +10 @@ below it!\n" \
254254
"+insert at end\n"
255255

256-
/* An insertion at the beginning and end of file (and the resultant patch) */
256+
#define PATCH_SIMPLE_COMMIT \
257+
"commit 15e119375018fba121cf58e02a9f17fe22df0df8\n" \
258+
"Author: Edward Thomson <ethomson@edwardthomson.com>\n" \
259+
"Date: Wed Jun 14 13:31:20 2017 +0200\n" \
260+
"\n" \
261+
" CHANGELOG: document git_filter_init and GIT_FILTER_INIT\n" \
262+
"\n" \
263+
"diff --git a/CHANGELOG.md b/CHANGELOG.md\n" \
264+
"index 1b9e0c90a..24ecba426 100644\n" \
265+
"--- a/CHANGELOG.md\n" \
266+
"+++ b/CHANGELOG.md\n" \
267+
"@@ -96,6 +96,9 @@ v0.26\n" \
268+
" * `git_transport_smart_proxy_options()' enables you to get the proxy options for\n" \
269+
" smart transports.\n" \
270+
"\n" \
271+
"+* The `GIT_FILTER_INIT` macro and the `git_filter_init` function are provided\n" \
272+
"+ to initialize a `git_filter` structure.\n" \
273+
"+\n" \
274+
" ### Breaking API changes\n" \
275+
"\n" \
276+
" * `clone_checkout_strategy` has been removed from\n"
277+
278+
#define PATCH_MULTIPLE_HUNKS \
279+
"diff --git a/x b/x\n" \
280+
"index 0719398..fa0350c 100644\n" \
281+
"--- a/x\n" \
282+
"+++ b/x\n" \
283+
"@@ -1,5 +1,4 @@\n" \
284+
" 1\n" \
285+
"-2\n" \
286+
" 3\n" \
287+
" 4\n" \
288+
" 5\n" \
289+
"@@ -7,3 +6,4 @@\n" \
290+
" 7\n" \
291+
" 8\n" \
292+
" 9\n" \
293+
"+10\n"
294+
295+
#define PATCH_MULTIPLE_FILES \
296+
"diff --git a/x b/x\n" \
297+
"index 8a1218a..7059ba5 100644\n" \
298+
"--- a/x\n" \
299+
"+++ b/x\n" \
300+
"@@ -1,5 +1,4 @@\n" \
301+
" 1\n" \
302+
" 2\n" \
303+
"-3\n" \
304+
" 4\n" \
305+
" 5\n" \
306+
"diff --git a/y b/y\n" \
307+
"index e006065..9405325 100644\n" \
308+
"--- a/y\n" \
309+
"+++ b/y\n" \
310+
"@@ -1,4 +1,5 @@\n" \
311+
" a\n" \
312+
" b\n" \
313+
"+c\n" \
314+
" d\n" \
315+
" e\n"
257316

258317
#define FILE_PREPEND_AND_APPEND \
259318
"first and\n" \

0 commit comments

Comments
 (0)