Skip to content

Commit 57bc9da

Browse files
committed
patch_parse: implement state machine for parsing patch headers
Our code parsing Git patch headers is rather lax in parsing headers of a Git-style patch. Most notably, we do not care for the exact order in which header lines appear and as such, we may parse patch files which are not really valid after all. Furthermore, the state transitions inside of the parser are not as obvious as they could be, making it harder than required to follow its logic. To improve upon this situation, this patch introduces a real state machine to parse the patches. Instead of simply parsing each line without caring for previous state and the exact ordering, we define a set of states with their allowed transitions. This makes the patch parser more strict in only allowing valid successions of header lines. As the transition table is defined inside of a single structure with the expected line, required state as well as the state that we end up in, all state transitions are immediately obvious from just having a look at this structure. This improves both maintainability and eases reasoning about the patch parser.
1 parent 0a93ded commit 57bc9da

File tree

1 file changed

+81
-46
lines changed

1 file changed

+81
-46
lines changed

src/patch_parse.c

Lines changed: 81 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -372,31 +372,73 @@ static int parse_header_dissimilarity(
372372
return 0;
373373
}
374374

375+
static int parse_header_start(git_patch_parsed *patch, git_patch_parse_ctx *ctx)
376+
{
377+
if (parse_header_path(&patch->header_old_path, ctx) < 0)
378+
return parse_err("corrupt old path in git diff header at line %"PRIuZ,
379+
ctx->line_num);
380+
381+
if (parse_advance_ws(ctx) < 0 ||
382+
parse_header_path(&patch->header_new_path, ctx) < 0)
383+
return parse_err("corrupt new path in git diff header at line %"PRIuZ,
384+
ctx->line_num);
385+
386+
return 0;
387+
}
388+
389+
typedef enum {
390+
STATE_START,
391+
392+
STATE_DIFF,
393+
STATE_FILEMODE,
394+
STATE_MODE,
395+
STATE_INDEX,
396+
STATE_PATH,
397+
398+
STATE_SIMILARITY,
399+
STATE_RENAME,
400+
STATE_COPY,
401+
402+
STATE_END,
403+
} parse_header_state;
404+
375405
typedef struct {
376406
const char *str;
407+
parse_header_state expected_state;
408+
parse_header_state next_state;
377409
int(*fn)(git_patch_parsed *, git_patch_parse_ctx *);
378-
} header_git_op;
379-
380-
static const header_git_op header_git_ops[] = {
381-
{ "diff --git ", NULL },
382-
{ "@@ -", NULL },
383-
{ "GIT binary patch", NULL },
384-
{ "Binary files ", NULL },
385-
{ "--- ", parse_header_git_oldpath },
386-
{ "+++ ", parse_header_git_newpath },
387-
{ "index ", parse_header_git_index },
388-
{ "old mode ", parse_header_git_oldmode },
389-
{ "new mode ", parse_header_git_newmode },
390-
{ "deleted file mode ", parse_header_git_deletedfilemode },
391-
{ "new file mode ", parse_header_git_newfilemode },
392-
{ "rename from ", parse_header_renamefrom },
393-
{ "rename to ", parse_header_renameto },
394-
{ "rename old ", parse_header_renamefrom },
395-
{ "rename new ", parse_header_renameto },
396-
{ "copy from ", parse_header_copyfrom },
397-
{ "copy to ", parse_header_copyto },
398-
{ "similarity index ", parse_header_similarity },
399-
{ "dissimilarity index ", parse_header_dissimilarity },
410+
} parse_header_transition;
411+
412+
static const parse_header_transition transitions[] = {
413+
/* Start */
414+
{ "diff --git " , STATE_START, STATE_DIFF, parse_header_start },
415+
416+
{ "deleted file mode " , STATE_DIFF, STATE_FILEMODE, parse_header_git_deletedfilemode },
417+
{ "new file mode " , STATE_DIFF, STATE_FILEMODE, parse_header_git_newfilemode },
418+
{ "old mode " , STATE_DIFF, STATE_MODE, parse_header_git_oldmode },
419+
{ "new mode " , STATE_MODE, STATE_END, parse_header_git_newmode },
420+
421+
{ "index " , STATE_FILEMODE, STATE_INDEX, parse_header_git_index },
422+
{ "index " , STATE_DIFF, STATE_INDEX, parse_header_git_index },
423+
{ "index " , STATE_END, STATE_INDEX, parse_header_git_index },
424+
425+
{ "--- " , STATE_INDEX, STATE_PATH, parse_header_git_oldpath },
426+
{ "+++ " , STATE_PATH, STATE_END, parse_header_git_newpath },
427+
{ "GIT binary patch" , STATE_INDEX, STATE_END, NULL },
428+
{ "Binary files " , STATE_INDEX, STATE_END, NULL },
429+
430+
{ "similarity index " , STATE_DIFF, STATE_SIMILARITY, parse_header_similarity },
431+
{ "dissimilarity index ", STATE_DIFF, STATE_SIMILARITY, parse_header_dissimilarity },
432+
{ "rename from " , STATE_SIMILARITY, STATE_RENAME, parse_header_renamefrom },
433+
{ "rename old " , STATE_SIMILARITY, STATE_RENAME, parse_header_renamefrom },
434+
{ "copy from " , STATE_SIMILARITY, STATE_COPY, parse_header_copyfrom },
435+
{ "rename to " , STATE_RENAME, STATE_END, parse_header_renameto },
436+
{ "rename new " , STATE_RENAME, STATE_END, parse_header_renameto },
437+
{ "copy to " , STATE_COPY, STATE_END, parse_header_copyto },
438+
439+
/* Next patch */
440+
{ "diff --git " , STATE_END, 0, NULL },
441+
{ "@@ -" , STATE_END, 0, NULL },
400442
};
401443

402444
static int parse_header_git(
@@ -405,44 +447,32 @@ static int parse_header_git(
405447
{
406448
size_t i;
407449
int error = 0;
408-
409-
/* Parse the diff --git line */
410-
if (parse_advance_expected_str(ctx, "diff --git ") < 0)
411-
return parse_err("corrupt git diff header at line %"PRIuZ, ctx->line_num);
412-
413-
if (parse_header_path(&patch->header_old_path, ctx) < 0)
414-
return parse_err("corrupt old path in git diff header at line %"PRIuZ,
415-
ctx->line_num);
416-
417-
if (parse_advance_ws(ctx) < 0 ||
418-
parse_header_path(&patch->header_new_path, ctx) < 0)
419-
return parse_err("corrupt new path in git diff header at line %"PRIuZ,
420-
ctx->line_num);
450+
parse_header_state state = STATE_START;
421451

422452
/* Parse remaining header lines */
423-
for (parse_advance_line(ctx);
424-
ctx->remain_len > 0;
425-
parse_advance_line(ctx)) {
426-
453+
for (; ctx->remain_len > 0; parse_advance_line(ctx)) {
427454
bool found = false;
428455

429456
if (ctx->line_len == 0 || ctx->line[ctx->line_len - 1] != '\n')
430457
break;
431458

432-
for (i = 0; i < ARRAY_SIZE(header_git_ops); i++) {
433-
const header_git_op *op = &header_git_ops[i];
434-
size_t op_len = strlen(op->str);
459+
for (i = 0; i < ARRAY_SIZE(transitions); i++) {
460+
const parse_header_transition *transition = &transitions[i];
461+
size_t op_len = strlen(transition->str);
435462

436-
if (memcmp(ctx->line, op->str, min(op_len, ctx->line_len)) != 0)
463+
if (transition->expected_state != state ||
464+
memcmp(ctx->line, transition->str, min(op_len, ctx->line_len)) != 0)
437465
continue;
438466

467+
state = transition->next_state;
468+
439469
/* Do not advance if this is the patch separator */
440-
if (op->fn == NULL)
470+
if (transition->fn == NULL)
441471
goto done;
442472

443473
parse_advance_chars(ctx, op_len);
444474

445-
if ((error = op->fn(patch, ctx)) < 0)
475+
if ((error = transition->fn(patch, ctx)) < 0)
446476
goto done;
447477

448478
parse_advance_ws(ctx);
@@ -456,14 +486,19 @@ static int parse_header_git(
456486
found = true;
457487
break;
458488
}
459-
489+
460490
if (!found) {
461491
error = parse_err("invalid patch header at line %"PRIuZ,
462492
ctx->line_num);
463493
goto done;
464494
}
465495
}
466496

497+
if (state != STATE_END) {
498+
error = parse_err("unexpected header line %"PRIuZ, ctx->line_num);
499+
goto done;
500+
}
501+
467502
done:
468503
return error;
469504
}

0 commit comments

Comments
 (0)