Skip to content

Commit 0393ecc

Browse files
authored
Merge pull request libgit2#4308 from pks-t/pks/header-state-machine
patch_parse: implement state machine for parsing patch headers
2 parents 5a061a2 + cc4c44a commit 0393ecc

File tree

2 files changed

+103
-46
lines changed

2 files changed

+103
-46
lines changed

src/patch_parse.c

Lines changed: 82 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -372,31 +372,74 @@ static int parse_header_dissimilarity(
372372
return 0;
373373
}
374374

375+
static int parse_header_start(git_patch_parsed *patch, git_patch_parse_ctx *ctx)
376+
{
377+
if (parse_header_path(&patch->header_old_path, ctx) < 0)
378+
return parse_err("corrupt old path in git diff header at line %"PRIuZ,
379+
ctx->line_num);
380+
381+
if (parse_advance_ws(ctx) < 0 ||
382+
parse_header_path(&patch->header_new_path, ctx) < 0)
383+
return parse_err("corrupt new path in git diff header at line %"PRIuZ,
384+
ctx->line_num);
385+
386+
return 0;
387+
}
388+
389+
typedef enum {
390+
STATE_START,
391+
392+
STATE_DIFF,
393+
STATE_FILEMODE,
394+
STATE_MODE,
395+
STATE_INDEX,
396+
STATE_PATH,
397+
398+
STATE_SIMILARITY,
399+
STATE_RENAME,
400+
STATE_COPY,
401+
402+
STATE_END,
403+
} parse_header_state;
404+
375405
typedef struct {
376406
const char *str;
407+
parse_header_state expected_state;
408+
parse_header_state next_state;
377409
int(*fn)(git_patch_parsed *, git_patch_parse_ctx *);
378-
} header_git_op;
379-
380-
static const header_git_op header_git_ops[] = {
381-
{ "diff --git ", NULL },
382-
{ "@@ -", NULL },
383-
{ "GIT binary patch", NULL },
384-
{ "Binary files ", NULL },
385-
{ "--- ", parse_header_git_oldpath },
386-
{ "+++ ", parse_header_git_newpath },
387-
{ "index ", parse_header_git_index },
388-
{ "old mode ", parse_header_git_oldmode },
389-
{ "new mode ", parse_header_git_newmode },
390-
{ "deleted file mode ", parse_header_git_deletedfilemode },
391-
{ "new file mode ", parse_header_git_newfilemode },
392-
{ "rename from ", parse_header_renamefrom },
393-
{ "rename to ", parse_header_renameto },
394-
{ "rename old ", parse_header_renamefrom },
395-
{ "rename new ", parse_header_renameto },
396-
{ "copy from ", parse_header_copyfrom },
397-
{ "copy to ", parse_header_copyto },
398-
{ "similarity index ", parse_header_similarity },
399-
{ "dissimilarity index ", parse_header_dissimilarity },
410+
} parse_header_transition;
411+
412+
static const parse_header_transition transitions[] = {
413+
/* Start */
414+
{ "diff --git " , STATE_START, STATE_DIFF, parse_header_start },
415+
416+
{ "deleted file mode " , STATE_DIFF, STATE_FILEMODE, parse_header_git_deletedfilemode },
417+
{ "new file mode " , STATE_DIFF, STATE_FILEMODE, parse_header_git_newfilemode },
418+
{ "old mode " , STATE_DIFF, STATE_MODE, parse_header_git_oldmode },
419+
{ "new mode " , STATE_MODE, STATE_END, parse_header_git_newmode },
420+
421+
{ "index " , STATE_FILEMODE, STATE_INDEX, parse_header_git_index },
422+
{ "index " , STATE_DIFF, STATE_INDEX, parse_header_git_index },
423+
{ "index " , STATE_END, STATE_INDEX, parse_header_git_index },
424+
425+
{ "--- " , STATE_INDEX, STATE_PATH, parse_header_git_oldpath },
426+
{ "+++ " , STATE_PATH, STATE_END, parse_header_git_newpath },
427+
{ "GIT binary patch" , STATE_INDEX, STATE_END, NULL },
428+
{ "Binary files " , STATE_INDEX, STATE_END, NULL },
429+
430+
{ "similarity index " , STATE_DIFF, STATE_SIMILARITY, parse_header_similarity },
431+
{ "dissimilarity index ", STATE_DIFF, STATE_SIMILARITY, parse_header_dissimilarity },
432+
{ "rename from " , STATE_SIMILARITY, STATE_RENAME, parse_header_renamefrom },
433+
{ "rename old " , STATE_SIMILARITY, STATE_RENAME, parse_header_renamefrom },
434+
{ "copy from " , STATE_SIMILARITY, STATE_COPY, parse_header_copyfrom },
435+
{ "rename to " , STATE_RENAME, STATE_END, parse_header_renameto },
436+
{ "rename new " , STATE_RENAME, STATE_END, parse_header_renameto },
437+
{ "copy to " , STATE_COPY, STATE_END, parse_header_copyto },
438+
439+
/* Next patch */
440+
{ "diff --git " , STATE_END, 0, NULL },
441+
{ "@@ -" , STATE_END, 0, NULL },
442+
{ "-- " , STATE_END, 0, NULL },
400443
};
401444

402445
static int parse_header_git(
@@ -405,44 +448,32 @@ static int parse_header_git(
405448
{
406449
size_t i;
407450
int error = 0;
408-
409-
/* Parse the diff --git line */
410-
if (parse_advance_expected_str(ctx, "diff --git ") < 0)
411-
return parse_err("corrupt git diff header at line %"PRIuZ, ctx->line_num);
412-
413-
if (parse_header_path(&patch->header_old_path, ctx) < 0)
414-
return parse_err("corrupt old path in git diff header at line %"PRIuZ,
415-
ctx->line_num);
416-
417-
if (parse_advance_ws(ctx) < 0 ||
418-
parse_header_path(&patch->header_new_path, ctx) < 0)
419-
return parse_err("corrupt new path in git diff header at line %"PRIuZ,
420-
ctx->line_num);
451+
parse_header_state state = STATE_START;
421452

422453
/* Parse remaining header lines */
423-
for (parse_advance_line(ctx);
424-
ctx->remain_len > 0;
425-
parse_advance_line(ctx)) {
426-
454+
for (; ctx->remain_len > 0; parse_advance_line(ctx)) {
427455
bool found = false;
428456

429457
if (ctx->line_len == 0 || ctx->line[ctx->line_len - 1] != '\n')
430458
break;
431459

432-
for (i = 0; i < ARRAY_SIZE(header_git_ops); i++) {
433-
const header_git_op *op = &header_git_ops[i];
434-
size_t op_len = strlen(op->str);
460+
for (i = 0; i < ARRAY_SIZE(transitions); i++) {
461+
const parse_header_transition *transition = &transitions[i];
462+
size_t op_len = strlen(transition->str);
435463

436-
if (memcmp(ctx->line, op->str, min(op_len, ctx->line_len)) != 0)
464+
if (transition->expected_state != state ||
465+
memcmp(ctx->line, transition->str, min(op_len, ctx->line_len)) != 0)
437466
continue;
438467

468+
state = transition->next_state;
469+
439470
/* Do not advance if this is the patch separator */
440-
if (op->fn == NULL)
471+
if (transition->fn == NULL)
441472
goto done;
442473

443474
parse_advance_chars(ctx, op_len);
444475

445-
if ((error = op->fn(patch, ctx)) < 0)
476+
if ((error = transition->fn(patch, ctx)) < 0)
446477
goto done;
447478

448479
parse_advance_ws(ctx);
@@ -456,14 +487,19 @@ static int parse_header_git(
456487
found = true;
457488
break;
458489
}
459-
490+
460491
if (!found) {
461492
error = parse_err("invalid patch header at line %"PRIuZ,
462493
ctx->line_num);
463494
goto done;
464495
}
465496
}
466497

498+
if (state != STATE_END) {
499+
error = parse_err("unexpected header line %"PRIuZ, ctx->line_num);
500+
goto done;
501+
}
502+
467503
done:
468504
return error;
469505
}

tests/diff/parse.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,27 @@ static void test_parse_invalid_diff(const char *invalid_diff)
5757
git_buf_free(&buf);
5858
}
5959

60+
void test_diff_parse__exact_rename(void)
61+
{
62+
const char *content =
63+
"---\n"
64+
" old_name.c => new_name.c | 0\n"
65+
" 1 file changed, 0 insertions(+), 0 deletions(-)\n"
66+
" rename old_name.c => new_name.c (100%)\n"
67+
"\n"
68+
"diff --git a/old_name.c b/new_name.c\n"
69+
"similarity index 100%\n"
70+
"rename from old_name.c\n"
71+
"rename to new_name.c\n"
72+
"-- \n"
73+
"2.9.3\n";
74+
git_diff *diff;
75+
76+
cl_git_pass(git_diff_from_buffer(
77+
&diff, content, strlen(content)));
78+
git_diff_free(diff);
79+
}
80+
6081
void test_diff_parse__invalid_patches_fails(void)
6182
{
6283
test_parse_invalid_diff(PATCH_CORRUPT_MISSING_NEW_FILE);

0 commit comments

Comments
 (0)