From 56fe3adeacfb33e86547c0ca340891b0fa2e5600 Mon Sep 17 00:00:00 2001 From: hyperpolymath <6759885+hyperpolymath@users.noreply.github.com> Date: Mon, 18 May 2026 15:45:20 +0100 Subject: [PATCH] tooling(codemod): instrumented parser + #{ record-migration codemod MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Branch-only tooling for affinescript#218 (do NOT merge into main — the Codemod_hook instrumentation is for the migration sweep only). lib/codemod_hook.ml records every record-literal LBRACE byte offset; parser.mly ExprRecord productions call Codemod_hook.note; codemod/ exe inserts '#' at exactly those offsets (`{`->`#{`, `Foo {`->`Foo #{`). Safety: refuses to rewrite unless every recorded offset is really '{'. Hardened to catch per-file parser exceptions so one bad file can't abort the sweep. Known limitation (recorded on #218): Affinescript.Parse.parse_file hits a pre-existing Match_failure (parse.ml:49) on stdlib + ~95/261 .affine that the 257 AOT harness parses fine — fix that parse-API bug (next) before the stdlib sweep. Refs #218 Co-Authored-By: Claude Opus 4.7 (1M context) --- codemod/codemod.ml | 79 +++++++++++++++++++++++++++++++++++++++++++++ codemod/dune | 3 ++ lib/codemod_hook.ml | 8 +++++ lib/dune | 1 + lib/parser.mly | 8 ++--- 5 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 codemod/codemod.ml create mode 100644 codemod/dune create mode 100644 lib/codemod_hook.ml diff --git a/codemod/codemod.ml b/codemod/codemod.ml new file mode 100644 index 00000000..f572be9a --- /dev/null +++ b/codemod/codemod.ml @@ -0,0 +1,79 @@ +(* #{ record-syntax codemod (affinescript#218). Parses each .affine with + the instrumented origin/main grammar, which records the byte offset of + every record-literal LBRACE in Affinescript.Codemod_hook.brace_offsets. + We then insert '#' immediately before each such '{', so `{` -> `#{` and + `Foo {` -> `Foo #{`. Only true record literals are touched. + + Usage: + codemod --check FILE... # report offsets, write nothing + codemod FILE... # rewrite in place (only if it parses & + # every offset really points at '{') *) + +let read_file path = + let ic = open_in_bin path in + let n = in_channel_length ic in + let s = really_input_string ic n in + close_in ic; s + +let write_file path s = + let oc = open_out_bin path in + output_string oc s; close_out oc + +let process ~check path = + Affinescript.Codemod_hook.brace_offsets := []; + match + (try Affinescript.Parse.parse_file path + with e -> Error (Printexc.to_string e, Affinescript.Span.dummy)) + with + | Error (msg, _) -> + Printf.eprintf "SKIP %s -- parse error: %s\n" path msg; false + | Ok _ -> + let offsets = + !Affinescript.Codemod_hook.brace_offsets + |> List.sort_uniq (fun a b -> compare b a) (* descending, deduped *) + in + let src = read_file path in + let len = String.length src in + let bad = + List.find_opt + (fun o -> o < 0 || o >= len || src.[o] <> '{') offsets + in + (match bad with + | Some o -> + Printf.eprintf + "SKIP %s -- offset %d is not '{'; not rewriting\n" path o; + false + | None -> + if check then begin + Printf.printf "%s: %d record literal(s) at %s\n" path + (List.length offsets) + (String.concat "," (List.map string_of_int offsets)); + true + end else if offsets = [] then begin + Printf.printf "OK %s -- no record literals\n" path; true + end else begin + let out = + List.fold_left + (fun acc o -> + String.sub acc 0 o ^ "#" ^ + String.sub acc o (String.length acc - o)) + src offsets + in + write_file path out; + Printf.printf "REWROTE %s -- %d record literal(s)\n" + path (List.length offsets); + true + end) + +let () = + let args = Array.to_list Sys.argv |> List.tl in + let check, files = + match args with + | "--check" :: rest -> true, rest + | rest -> false, rest + in + let ok = ref 0 and skip = ref 0 in + List.iter + (fun f -> if process ~check f then incr ok else incr skip) + files; + Printf.eprintf "done: %d processed, %d skipped\n" !ok !skip diff --git a/codemod/dune b/codemod/dune new file mode 100644 index 00000000..28a49ea7 --- /dev/null +++ b/codemod/dune @@ -0,0 +1,3 @@ +(executable + (name codemod) + (libraries affinescript)) diff --git a/lib/codemod_hook.ml b/lib/codemod_hook.ml new file mode 100644 index 00000000..19bece3e --- /dev/null +++ b/lib/codemod_hook.ml @@ -0,0 +1,8 @@ +(* CODEMOD INSTRUMENTATION — branch stage-c/codemod only, never merged + (affinescript#218). The instrumented parser calls [note] with the + byte position of every record-literal LBRACE it matches; the #{ + migration codemod reads [brace_offsets] to insert '#' there. *) + +let brace_offsets : int list ref = ref [] +let note (p : Lexing.position) = + brace_offsets := p.Lexing.pos_cnum :: !brace_offsets diff --git a/lib/dune b/lib/dune index c22c1031..015e3d71 100644 --- a/lib/dune +++ b/lib/dune @@ -4,6 +4,7 @@ (modes byte native) (modules ast + codemod_hook borrow c_codegen cafe_face diff --git a/lib/parser.mly b/lib/parser.mly index f020126c..48665531 100644 --- a/lib/parser.mly +++ b/lib/parser.mly @@ -771,8 +771,8 @@ expr_primary: /* Struct literal: `Point { x: v, y: w }`. Must come before the plain upper_ident production so Menhir shifts LBRACE rather than reducing upper_ident to ExprVar when the next token is LBRACE. */ - | _ty = upper_ident LBRACE b = expr_record_body RBRACE - { ExprRecord { er_fields = fst b; er_spread = snd b } } + | _ty = upper_ident l = LBRACE b = expr_record_body RBRACE + { Codemod_hook.note $startpos(l); ignore l; ExprRecord { er_fields = fst b; er_spread = snd b } } | name = upper_ident { ExprVar (mk_ident name $startpos $endpos) } | ty = upper_ident COLONCOLON variant = upper_ident { ExprVariant (mk_ident ty $startpos(ty) $endpos(ty), @@ -791,8 +791,8 @@ expr_primary: avoid the LALR(1) greedy-separator conflict that arises when a ROW_VAR spread like `..record` follows a COMMA that `separated_list` has already consumed expecting another record_field. */ - | LBRACE b = expr_record_body RBRACE - { ExprRecord { er_fields = fst b; er_spread = snd b } } + | l = LBRACE b = expr_record_body RBRACE + { Codemod_hook.note $startpos(l); ignore l; ExprRecord { er_fields = fst b; er_spread = snd b } } /* Block */ | blk = block { ExprBlock blk }