From da8e421e0f4ba5732258c3e6b75f92c9026d39f7 Mon Sep 17 00:00:00 2001 From: matt rice Date: Sun, 14 Sep 2025 15:36:45 -0700 Subject: [PATCH 1/4] Add arrays to grmtools_section.test --- cfgrammar/src/lib/header.rs | 85 +++++++++++++++---------- lrlex/src/lib/ctbuilder.rs | 7 ++ lrpar/cttests/src/grmtools_section.test | 15 +++++ lrpar/src/lib/parser.rs | 7 ++ 4 files changed, 82 insertions(+), 32 deletions(-) diff --git a/cfgrammar/src/lib/header.rs b/cfgrammar/src/lib/header.rs index a0e729ac1..9886ede54 100644 --- a/cfgrammar/src/lib/header.rs +++ b/cfgrammar/src/lib/header.rs @@ -137,6 +137,8 @@ pub enum Setting { }, Num(u64, T), String(String, T), + // The two `T` values are for the spans of the open and close brackets `[`, and `]`. + Array(Vec>, T, T), } /// Parser for the `%grmtools` section @@ -157,42 +159,53 @@ pub enum Value { Setting(Setting), } +impl From> for Setting { + fn from(s: Setting) -> Setting { + match s { + Setting::Unitary(Namespaced { + namespace, + member: (m, ml), + }) => Setting::Unitary(Namespaced { + namespace: namespace.map(|(n, nl)| (n, nl.into())), + member: (m, ml.into()), + }), + Setting::Constructor { + ctor: + Namespaced { + namespace: ctor_ns, + member: (ctor_m, ctor_ml), + }, + arg: + Namespaced { + namespace: arg_ns, + member: (arg_m, arg_ml), + }, + } => Setting::Constructor { + ctor: Namespaced { + namespace: ctor_ns.map(|(ns, ns_l)| (ns, ns_l.into())), + member: (ctor_m, ctor_ml.into()), + }, + arg: Namespaced { + namespace: arg_ns.map(|(ns, ns_l)| (ns, ns_l.into())), + member: (arg_m, arg_ml.into()), + }, + }, + Setting::Num(num, num_loc) => Setting::Num(num, num_loc.into()), + Setting::String(s, str_loc) => Setting::String(s, str_loc.into()), + Setting::Array(mut xs, arr_open_loc, arr_close_loc) => Setting::Array( + xs.drain(..).map(|x| x.into()).collect(), + arr_open_loc.into(), + arr_close_loc.into(), + ), + } + } +} + impl From> for Value { fn from(v: Value) -> Value { match v { Value::Flag(flag, u) => Value::Flag(flag, u.into()), - Value::Setting(s) => Value::Setting(match s { - Setting::Unitary(Namespaced { - namespace, - member: (m, ml), - }) => Setting::Unitary(Namespaced { - namespace: namespace.map(|(n, nl)| (n, nl.into())), - member: (m, ml.into()), - }), - Setting::Constructor { - ctor: - Namespaced { - namespace: ctor_ns, - member: (ctor_m, ctor_ml), - }, - arg: - Namespaced { - namespace: arg_ns, - member: (arg_m, arg_ml), - }, - } => Setting::Constructor { - ctor: Namespaced { - namespace: ctor_ns.map(|(ns, ns_l)| (ns, ns_l.into())), - member: (ctor_m, ctor_ml.into()), - }, - arg: Namespaced { - namespace: arg_ns.map(|(ns, ns_l)| (ns, ns_l.into())), - member: (arg_m, arg_ml.into()), - }, - }, - Setting::Num(num, num_loc) => Setting::Num(num, num_loc.into()), - Setting::String(s, str_loc) => Setting::String(s, str_loc.into()), - }), + Value::Setting(s) => Value::Setting(s.into()), } } } @@ -215,6 +228,7 @@ impl Value { format!("'{member}'") } } + Value::Setting(Setting::Array(_, _, _)) => "array".to_string(), Value::Setting(Setting::Constructor { ctor: Namespaced { @@ -587,6 +601,13 @@ impl TryFrom<&Value> for YaccKind { ), locations: vec![loc.clone()], }), + Value::Setting(Setting::Array(_, loc, _)) => Err(HeaderError { + kind: HeaderErrorKind::ConversionError( + "From", + "Cannot convert array to YaccKind", + ), + locations: vec![loc.clone()], + }), Value::Setting(Setting::String(_, loc)) => Err(HeaderError { kind: HeaderErrorKind::ConversionError( "From", diff --git a/lrlex/src/lib/ctbuilder.rs b/lrlex/src/lib/ctbuilder.rs index 9b3060b33..9f00a8d2b 100644 --- a/lrlex/src/lib/ctbuilder.rs +++ b/lrlex/src/lib/ctbuilder.rs @@ -90,6 +90,13 @@ impl TryFrom<&Value> for LexerKind { ), locations: vec![loc.clone()], }), + Value::Setting(Setting::Array(_, arr_loc, _)) => Err(HeaderError { + kind: HeaderErrorKind::ConversionError( + "LexerKind", + "Expected `LexerKind` found array", + ), + locations: vec![arr_loc.clone()], + }), Value::Setting(Setting::Unitary(Namespaced { namespace, member: (member, member_loc), diff --git a/lrpar/cttests/src/grmtools_section.test b/lrpar/cttests/src/grmtools_section.test index 0b5b807d9..3821c26ff 100644 --- a/lrpar/cttests/src/grmtools_section.test +++ b/lrpar/cttests/src/grmtools_section.test @@ -109,8 +109,21 @@ grammar: | Setting::String(s, string_span) } | namespaced '(' namespaced ')' { Setting::Constructor{ctor: $1, arg: $3} } + | '[' array_seq ']' { Setting::Array($2, $1.as_ref().unwrap().span(), $3.as_ref().unwrap().span()) } ; + array_seq -> Vec> + : %empty { Vec::new() } + | val { + let mut x = Vec::new(); + x.push($1); + x + } + | array_seq ',' val { + $1.push($3); + $1 + } + ; comma_opt -> () : %empty { } | ',' { } @@ -145,6 +158,8 @@ lexer: | \} '}' \( '(' \) ')' + \[ '[' + \] ']' :: '::' : ':' \"(\\.|[^"\\])*\" 'STRING' diff --git a/lrpar/src/lib/parser.rs b/lrpar/src/lib/parser.rs index 3faa817dc..7977cfd35 100644 --- a/lrpar/src/lib/parser.rs +++ b/lrpar/src/lib/parser.rs @@ -722,6 +722,13 @@ impl TryFrom<&Value> for RecoveryKind { ), locations: vec![arg_loc.clone()], }), + Value::Setting(Setting::Array(_, arr_loc, _)) => Err(HeaderError { + kind: HeaderErrorKind::ConversionError( + "RecoveryKind", + "Cannot convert array to RecoveryKind", + ), + locations: vec![arr_loc.clone()], + }), } } } From b5b22a2bb5b0dee69d22aa2f4eca66792e1cb908 Mon Sep 17 00:00:00 2001 From: matt rice Date: Sun, 14 Sep 2025 17:01:48 -0700 Subject: [PATCH 2/4] Add array values to `GrmtoolsSectionParser` and switch `test_files` 1. Split up the current `parse_value` function into `parse_key_value`, and `parse_setting` functions. 2. Add support for arrays to `parse_setting`. 3. Switch the top-level `test_files` support to expect an array. --- cfgrammar/src/lib/header.rs | 142 ++++++++++-------- doc/src/yaccextensions.md | 5 +- lrlex/src/lib/ctbuilder.rs | 70 ++++++--- lrpar/cttests/src/calc_input.test | 2 +- lrpar/cttests/src/ctfails/calc_bad_input.test | 2 +- lrpar/cttests/src/grmtools_section.test | 9 +- lrpar/cttests/src/lib.rs | 5 +- lrpar/examples/calc_actions/src/calc.y | 2 +- lrpar/examples/calc_ast/src/calc.y | 2 +- lrpar/examples/calc_ast_arena/src/calc.y | 2 +- lrpar/examples/calc_parsetree/src/calc.y | 2 +- lrpar/examples/clone_param/src/param.y | 2 +- lrpar/examples/start_states/src/comment.y | 2 +- lrpar/src/lib/parser.rs | 4 + 14 files changed, 161 insertions(+), 90 deletions(-) diff --git a/cfgrammar/src/lib/header.rs b/cfgrammar/src/lib/header.rs index 9886ede54..21e29fe27 100644 --- a/cfgrammar/src/lib/header.rs +++ b/cfgrammar/src/lib/header.rs @@ -295,7 +295,85 @@ fn add_duplicate_occurrence( } impl<'input> GrmtoolsSectionParser<'input> { - pub fn parse_value( + fn parse_setting(&'_ self, mut i: usize) -> Result<(Setting, usize), HeaderError> { + i = self.parse_ws(i); + match RE_DIGITS.find(&self.src[i..]) { + Some(m) => { + let num_span = Span::new(i + m.start(), i + m.end()); + let num_str = &self.src[num_span.start()..num_span.end()]; + // If the above regex matches we expect this to succeed. + let num = str::parse::(num_str).unwrap(); + let val = Setting::Num(num, num_span); + i = self.parse_ws(num_span.end()); + Ok((val, i)) + } + None => match RE_STRING.find(&self.src[i..]) { + Some(m) => { + let end = i + m.end(); + // Trim the leading and trailing quotes. + let str_span = Span::new(i + m.start() + 1, end - 1); + let str = &self.src[str_span.start()..str_span.end()]; + let setting = Setting::String(str.to_string(), str_span); + // After the trailing quotes. + i = self.parse_ws(end); + Ok((setting, i)) + } + None => { + if let Some(mut j) = self.lookahead_is("[", i) { + let mut vals = Vec::new(); + let open_pos = j; + + loop { + j = self.parse_ws(j); + if let Some(end_pos) = self.lookahead_is("]", j) { + return Ok(( + Setting::Array( + vals, + Span::new(i, open_pos), + Span::new(j, end_pos), + ), + end_pos, + )); + } + if let Ok((val, k)) = self.parse_setting(j) { + vals.push(val); + j = self.parse_ws(k); + } + if let Some(k) = self.lookahead_is(",", j) { + j = k + } + } + } else { + let (path_val, j) = self.parse_namespaced(i)?; + i = self.parse_ws(j); + if let Some(j) = self.lookahead_is("(", i) { + let (arg, j) = self.parse_namespaced(j)?; + i = self.parse_ws(j); + if let Some(j) = self.lookahead_is(")", i) { + i = self.parse_ws(j); + Ok(( + Setting::Constructor { + ctor: path_val, + arg, + }, + i, + )) + } else { + Err(HeaderError { + kind: HeaderErrorKind::ExpectedToken(')'), + locations: vec![Span::new(i, i)], + }) + } + } else { + Ok((Setting::Unitary(path_val), i)) + } + } + } + }, + } + } + + pub fn parse_key_value( &'_ self, mut i: usize, ) -> Result<(String, Span, Value, usize), HeaderError> { @@ -312,62 +390,8 @@ impl<'input> GrmtoolsSectionParser<'input> { let key_span = Span::new(i, j); i = self.parse_ws(j); if let Some(j) = self.lookahead_is(":", i) { - i = self.parse_ws(j); - match RE_DIGITS.find(&self.src[i..]) { - Some(m) => { - let num_span = Span::new(i + m.start(), i + m.end()); - let num_str = &self.src[num_span.start()..num_span.end()]; - // If the above regex matches we expect this to succeed. - let num = str::parse::(num_str).unwrap(); - let val = Setting::Num(num, num_span); - i = self.parse_ws(num_span.end()); - Ok((key_name, key_span, Value::Setting(val), i)) - } - None => match RE_STRING.find(&self.src[i..]) { - Some(m) => { - let end = i + m.end(); - // Trim the leading and trailing quotes. - let str_span = Span::new(i + m.start() + 1, end - 1); - let str = &self.src[str_span.start()..str_span.end()]; - let setting = Setting::String(str.to_string(), str_span); - // After the trailing quotes. - i = self.parse_ws(end); - Ok((key_name, key_span, Value::Setting(setting), i)) - } - None => { - let (path_val, j) = self.parse_namespaced(i)?; - i = self.parse_ws(j); - if let Some(j) = self.lookahead_is("(", i) { - let (arg, j) = self.parse_namespaced(j)?; - i = self.parse_ws(j); - if let Some(j) = self.lookahead_is(")", i) { - i = self.parse_ws(j); - Ok(( - key_name, - key_span, - Value::Setting(Setting::Constructor { - ctor: path_val, - arg, - }), - i, - )) - } else { - Err(HeaderError { - kind: HeaderErrorKind::ExpectedToken(')'), - locations: vec![Span::new(i, i)], - }) - } - } else { - Ok(( - key_name, - key_span, - Value::Setting(Setting::Unitary(path_val)), - i, - )) - } - } - }, - } + let (val, j) = self.parse_setting(j)?; + Ok((key_name, key_span, Value::Setting(val), j)) } else { Ok((key_name, key_span, Value::Flag(true, key_span), i)) } @@ -428,7 +452,7 @@ impl<'input> GrmtoolsSectionParser<'input> { if let Some(j) = self.lookahead_is("{", i) { i = self.parse_ws(j); while self.lookahead_is("}", i).is_none() && i < self.src.len() { - let (key, key_loc, val, j) = match self.parse_value(i) { + let (key, key_loc, val, j) = match self.parse_key_value(i) { Ok((key, key_loc, val, pos)) => (key, key_loc, val, pos), Err(e) => { errs.push(e); @@ -453,7 +477,7 @@ impl<'input> GrmtoolsSectionParser<'input> { i = self.parse_ws(j); continue; } else { - i = j; + i = self.parse_ws(j); break; } } diff --git a/doc/src/yaccextensions.md b/doc/src/yaccextensions.md index 2fdca72cf..07dbd432d 100644 --- a/doc/src/yaccextensions.md +++ b/doc/src/yaccextensions.md @@ -7,9 +7,10 @@ But a default can be set or forced by using a `YaccKindResolver`. |------------------|-------------------------------------------------|--------------| | `yacckind` | [YaccKind](yacccompatibility.md#yacckinds) | ✓ | | `recoverykind` | [RecoveryKind](errorrecovery.md#recoverykinds) | ✗ | -| `test_files`[^†] | String | ✗ | +| `test_files`[^†] | Array of string values | ✗ | -[^†]: String containing a glob relative to the yacc `.y` source file, experimental. +[^†]: Strings containing globs are resolved relative to the yacc `.y` source file. + `test_files` is currently experimental. ## Example diff --git a/lrlex/src/lib/ctbuilder.rs b/lrlex/src/lib/ctbuilder.rs index 9f00a8d2b..61bd8fb76 100644 --- a/lrlex/src/lib/ctbuilder.rs +++ b/lrlex/src/lib/ctbuilder.rs @@ -545,29 +545,65 @@ where .collect::>(); closure_lexerdef.set_rule_ids(&owned_map); yacc_header.mark_used(&"test_files".to_string()); + let grammar = rtpb.grammar(); let test_glob = yacc_header.get("test_files"); + let mut err_str = None; + let add_error_line = |err_str: &mut Option, line| { + if let Some(err_str) = err_str { + err_str.push_str(&format!("{}\n", line)); + } else { + let _ = err_str.insert(format!("{}\n", line)); + } + }; match test_glob { - Some(HeaderValue(_, Value::Setting(Setting::String(test_files, _)))) => { - let path_joined = grm_path.parent().unwrap().join(test_files); - for path in - glob(&path_joined.to_string_lossy()).map_err(|e| e.to_string())? - { - let path = path?; - if let Some(ext) = path.extension() { - if let Some(ext) = ext.to_str() { - if ext.starts_with("grm") { - Err(ErrorString("test_files extensions beginning with `grm` are reserved.".into()))? + Some(HeaderValue(_, Value::Setting(Setting::Array(test_globs, _, _)))) => { + for setting in test_globs { + match setting { + Setting::String(test_files, _) => { + let path_joined = grm_path.parent().unwrap().join(test_files); + let path_str = &path_joined.to_string_lossy(); + let mut glob_paths = glob(path_str).map_err(|e| e.to_string())?.peekable(); + if glob_paths.peek().is_none() { + return Err(format!("'test_files' glob '{}' matched no paths", path_str) + .to_string() + .into(), + ); + } + + for path in glob_paths { + let path = path?; + if let Some(ext) = path.extension() { + if let Some(ext) = ext.to_str() { + if ext.starts_with("grm") { + add_error_line(&mut err_str, "test_files extensions beginning with `grm` are reserved.".into()); + } + } + } + let input = fs::read_to_string(&path)?; + let l: LRNonStreamingLexer = + closure_lexerdef.lexer(&input); + let errs = rtpb.parse_map(&l, &|_| (), &|_, _| ()).1; + if !errs.is_empty() { + add_error_line(&mut err_str, format!("While parsing {}:", path.display())); + for e in errs { + let e_pp = e.pp(&l, &|t| grammar.token_epp(t)); + let e_lines = e_pp.split("\n"); + for e in e_lines { + add_error_line(&mut err_str, format!("\t{}", e)); + } + } + } } } + _ => return Err("Invalid value for setting 'test_files'".into()), } - let input = fs::read_to_string(&path)?; - let l: LRNonStreamingLexer = - closure_lexerdef.lexer(&input); - for e in rtpb.parse_map(&l, &|_| (), &|_, _| ()).1 { - Err(format!("parsing {}: {}", path.display(), e))? - } } - Ok(()) + if let Some(err_str) = err_str { + Err(ErrorString(err_str))? + } else { + Ok(()) + } + } Some(_) => Err("Invalid value for setting 'test_files'".into()), None => Ok(()), diff --git a/lrpar/cttests/src/calc_input.test b/lrpar/cttests/src/calc_input.test index c731508d1..f21da5c1a 100644 --- a/lrpar/cttests/src/calc_input.test +++ b/lrpar/cttests/src/calc_input.test @@ -3,7 +3,7 @@ grammar: | %grmtools { yacckind: Original(YaccOriginalActionKind::UserAction), recoverer: RecoveryKind::None, - test_files: "*.calc_input" + test_files: ["*.calc_input"], } %start Expr %actiontype Result diff --git a/lrpar/cttests/src/ctfails/calc_bad_input.test b/lrpar/cttests/src/ctfails/calc_bad_input.test index 31deceb22..22c19ebd2 100644 --- a/lrpar/cttests/src/ctfails/calc_bad_input.test +++ b/lrpar/cttests/src/ctfails/calc_bad_input.test @@ -3,7 +3,7 @@ grammar: | %grmtools { yacckind: Original(YaccOriginalActionKind::UserAction), recoverer: RecoveryKind::None, - test_files: "*.bad_input" + test_files: ["*.bad_input"] } %start Expr %actiontype Result diff --git a/lrpar/cttests/src/grmtools_section.test b/lrpar/cttests/src/grmtools_section.test index 3821c26ff..616bc25c7 100644 --- a/lrpar/cttests/src/grmtools_section.test +++ b/lrpar/cttests/src/grmtools_section.test @@ -1,5 +1,9 @@ grammar: | - %grmtools{yacckind: Grmtools} + %grmtools{ + yacckind: Grmtools, + recoverer: RecoveryKind::CPCTPlus, + test_files: ["*.input_grmtools_section"] + } %token MAGIC IDENT NUM STRING %epp MAGIC "%grmtools" %% @@ -164,3 +168,6 @@ lexer: | : ':' \"(\\.|[^"\\])*\" 'STRING' \p{Pattern_White_Space} ; +extra_files: + test.input_grmtools_section: | + %grmtools{yacckind: Grmtools, !b, !a} diff --git a/lrpar/cttests/src/lib.rs b/lrpar/cttests/src/lib.rs index eb845b6c9..4bcacf56c 100644 --- a/lrpar/cttests/src/lib.rs +++ b/lrpar/cttests/src/lib.rs @@ -355,16 +355,15 @@ fn test_expect() { #[test] fn test_grmtools_section_files() { use glob::glob; - use std::env; use std::fs::File; use std::io::BufReader; use std::io::{BufRead as _, Read as _}; - let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); + let manifest_dir = env!("CARGO_MANIFEST_DIR"); let examples_glob = format!("{manifest_dir}/../examples/**"); let examples_l_glob = format!("{examples_glob}/*.l"); let examples_y_glob = format!("{examples_glob}/*.y"); - let out_dir = env::var("OUT_DIR").unwrap(); + let out_dir = env!("OUT_DIR"); let cttests_l_glob = format!("{out_dir}/*.l"); let cttests_y_glob = format!("{out_dir}/*.y"); let files = glob(&examples_l_glob) diff --git a/lrpar/examples/calc_actions/src/calc.y b/lrpar/examples/calc_actions/src/calc.y index 31311849f..8388ba2c7 100644 --- a/lrpar/examples/calc_actions/src/calc.y +++ b/lrpar/examples/calc_actions/src/calc.y @@ -1,6 +1,6 @@ %grmtools { yacckind: Grmtools, - test_files: "input*.txt", + test_files: ["input*.txt"], } %start Expr %avoid_insert "INT" diff --git a/lrpar/examples/calc_ast/src/calc.y b/lrpar/examples/calc_ast/src/calc.y index 35b68ce23..ad8bf48f1 100644 --- a/lrpar/examples/calc_ast/src/calc.y +++ b/lrpar/examples/calc_ast/src/calc.y @@ -1,6 +1,6 @@ %grmtools { yacckind: Grmtools, - test_files: "input*.txt", + test_files: ["input*.txt"], } %start Expr %avoid_insert "INT" diff --git a/lrpar/examples/calc_ast_arena/src/calc.y b/lrpar/examples/calc_ast_arena/src/calc.y index 2e2e05a3f..d2e77060d 100644 --- a/lrpar/examples/calc_ast_arena/src/calc.y +++ b/lrpar/examples/calc_ast_arena/src/calc.y @@ -1,6 +1,6 @@ %grmtools { yacckind: Grmtools, - test_files: "input*.txt", + test_files: ["input*.txt"], } %start Expr %avoid_insert "INT" diff --git a/lrpar/examples/calc_parsetree/src/calc.y b/lrpar/examples/calc_parsetree/src/calc.y index a024c1b9c..b8a1f34d2 100644 --- a/lrpar/examples/calc_parsetree/src/calc.y +++ b/lrpar/examples/calc_parsetree/src/calc.y @@ -1,6 +1,6 @@ %grmtools{ yacckind: Original(GenericParseTree), - test_files: "input*.txt", + test_files: ["input*.txt"], } %start Expr %avoid_insert "INT" diff --git a/lrpar/examples/clone_param/src/param.y b/lrpar/examples/clone_param/src/param.y index ac4bc7262..c3276c885 100644 --- a/lrpar/examples/clone_param/src/param.y +++ b/lrpar/examples/clone_param/src/param.y @@ -1,6 +1,6 @@ %grmtools { yacckind: Grmtools, - test_files: "input*.txt", + test_files: ["input*.txt"], } %expect-unused Unmatched "UNMATCHED" %token Incr Decr diff --git a/lrpar/examples/start_states/src/comment.y b/lrpar/examples/start_states/src/comment.y index 9f9cef8bb..2e72fd0db 100644 --- a/lrpar/examples/start_states/src/comment.y +++ b/lrpar/examples/start_states/src/comment.y @@ -1,6 +1,6 @@ %grmtools{ yacckind: Original(GenericParseTree), - test_files: "input*.txt", + test_files: ["input*.txt"], } %start Expr %% diff --git a/lrpar/src/lib/parser.rs b/lrpar/src/lib/parser.rs index 7977cfd35..2055402d8 100644 --- a/lrpar/src/lib/parser.rs +++ b/lrpar/src/lib/parser.rs @@ -1037,6 +1037,10 @@ where param, ) } + + pub fn grammar(&self) -> &YaccGrammar { + self.grm + } } /// After a parse error is encountered, the parser attempts to find a way of recovering. Each entry From 983085972398f06eb4f3bc0f5958acaf44e8324a Mon Sep 17 00:00:00 2001 From: matt rice Date: Tue, 21 Oct 2025 15:11:11 -0700 Subject: [PATCH 3/4] Additional cttest files for array based `test_files`. --- lrpar/cttests/src/calc_input.test | 4 +++- lrpar/cttests/src/ctfails/calc_bad_input.test | 5 +++-- lrpar/cttests/src/ctfails/test_files1.test | 16 ++++++++++++++++ lrpar/cttests/src/ctfails/test_files2.test | 16 ++++++++++++++++ lrpar/cttests/src/ctfails/test_files3.test | 16 ++++++++++++++++ 5 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 lrpar/cttests/src/ctfails/test_files1.test create mode 100644 lrpar/cttests/src/ctfails/test_files2.test create mode 100644 lrpar/cttests/src/ctfails/test_files3.test diff --git a/lrpar/cttests/src/calc_input.test b/lrpar/cttests/src/calc_input.test index f21da5c1a..b6dfa61cc 100644 --- a/lrpar/cttests/src/calc_input.test +++ b/lrpar/cttests/src/calc_input.test @@ -3,7 +3,7 @@ grammar: | %grmtools { yacckind: Original(YaccOriginalActionKind::UserAction), recoverer: RecoveryKind::None, - test_files: ["*.calc_input"], + test_files: ["*.calc_input", "*.calc_input2"], } %start Expr %actiontype Result @@ -46,4 +46,6 @@ extra_files: 1 + 2 * 3 input2.calc_input: | (1 + 2) * 3 + input1.calc_input2: | + 2 * 3 diff --git a/lrpar/cttests/src/ctfails/calc_bad_input.test b/lrpar/cttests/src/ctfails/calc_bad_input.test index 22c19ebd2..1a4d3367f 100644 --- a/lrpar/cttests/src/ctfails/calc_bad_input.test +++ b/lrpar/cttests/src/ctfails/calc_bad_input.test @@ -3,7 +3,7 @@ grammar: | %grmtools { yacckind: Original(YaccOriginalActionKind::UserAction), recoverer: RecoveryKind::None, - test_files: ["*.bad_input"] + test_files: ["*.valid_input", "*.bad_input"] } %start Expr %actiontype Result @@ -42,6 +42,7 @@ lexer: | \) ")" [\t\n ]+ ; extra_files: + input1.valid_input: | + (1 + 2) * 3 input1.bad_input: | (1 + 2 * 3 - diff --git a/lrpar/cttests/src/ctfails/test_files1.test b/lrpar/cttests/src/ctfails/test_files1.test new file mode 100644 index 000000000..b20f00698 --- /dev/null +++ b/lrpar/cttests/src/ctfails/test_files1.test @@ -0,0 +1,16 @@ +name: Test string value type instead of array in %grmtools{test_files} +grammar: | + %grmtools { + yacckind: Original(YaccOriginalActionKind::UserAction), + recoverer: RecoveryKind::None, + test_files: "should_be_an_array" + } + %start Expr + %actiontype () + %% + Expr: '(' ')' { () } ; +lexer: | + %% + \( "(" + \) ")" + [\t\n ]+ ; diff --git a/lrpar/cttests/src/ctfails/test_files2.test b/lrpar/cttests/src/ctfails/test_files2.test new file mode 100644 index 000000000..a52a81ab1 --- /dev/null +++ b/lrpar/cttests/src/ctfails/test_files2.test @@ -0,0 +1,16 @@ +name: Test non-string in array of %grmtools{test_files} +grammar: | + %grmtools { + yacckind: Original(YaccOriginalActionKind::UserAction), + recoverer: RecoveryKind::None, + test_files: [ShouldBeAString] + } + %start Expr + %actiontype () + %% + Expr: '(' ')' { () } ; +lexer: | + %% + \( "(" + \) ")" + [\t\n ]+ ; diff --git a/lrpar/cttests/src/ctfails/test_files3.test b/lrpar/cttests/src/ctfails/test_files3.test new file mode 100644 index 000000000..f7f3a79ae --- /dev/null +++ b/lrpar/cttests/src/ctfails/test_files3.test @@ -0,0 +1,16 @@ +name: Test empty matchless glob in array of %grmtools{test_files} +grammar: | + %grmtools { + yacckind: Original(YaccOriginalActionKind::UserAction), + recoverer: RecoveryKind::None, + test_files: ["*.nonexistent"] + } + %start Expr + %actiontype () + %% + Expr: '(' ')' { () } ; +lexer: | + %% + \( "(" + \) ")" + [\t\n ]+ ; From 57717e3975be3984c67223f85b90c6043f291bbd Mon Sep 17 00:00:00 2001 From: matt rice Date: Tue, 21 Oct 2025 14:54:02 -0700 Subject: [PATCH 4/4] Change nimbleparse `test_files` to use an array --- .buildbot.sh | 12 ++++ cfgrammar/src/lib/header.rs | 50 -------------- nimbleparse/src/main.rs | 134 +++++++++++++++++++++--------------- 3 files changed, 90 insertions(+), 106 deletions(-) diff --git a/.buildbot.sh b/.buildbot.sh index b0f48617d..604964149 100644 --- a/.buildbot.sh +++ b/.buildbot.sh @@ -56,23 +56,35 @@ echo "2 + 3 * 4" | cargo run | grep "Result: 14" touch src/main.rs && CACHE_EXPECTED=y cargo build cd $root/lrpar/examples/calc_actions echo "2 + 3 * 4" | cargo run --package nimbleparse -- src/calc.l src/calc.y - +# Invoke `%grmtools{test_files}` +cargo run --package nimbleparse -- src/calc.l src/calc.y echo "2 + 3 * 4" | cargo run | grep "Result: 14" touch src/main.rs && CACHE_EXPECTED=y cargo build cd $root/lrpar/examples/calc_ast echo "2 + 3 * 4" | cargo run --package nimbleparse -- src/calc.l src/calc.y - +# Invoke `%grmtools{test_files}` +cargo run --package nimbleparse -- src/calc.l src/calc.y echo "2 + 3 * 4" | cargo run | grep "Result: 14" cd $root/lrpar/examples/calc_ast_arena echo "2 + 3 * 4" | cargo run --package nimbleparse -- src/calc.l src/calc.y - +# Invoke `%grmtools{test_files}` +cargo run --package nimbleparse -- src/calc.l src/calc.y echo "2 + 3 * 4" | cargo run | grep "Result: 14" touch src/main.rs && CACHE_EXPECTED=y cargo build cd $root/lrpar/examples/calc_parsetree echo "2 + 3 * 4" | cargo run --package nimbleparse -- src/calc.l src/calc.y - +# Invoke `%grmtools{test_files}` +cargo run --package nimbleparse -- src/calc.l src/calc.y echo "2 + 3 * 4" | cargo run | grep "Result: 14" touch src/main.rs && CACHE_EXPECTED=y cargo build cd $root/lrpar/examples/clone_param echo "1+++" | cargo run --package nimbleparse -- src/param.l src/param.y - +# Invoke `%grmtools{test_files}` +cargo run --package nimbleparse -- src/param.l src/param.y cd $root/lrpar/examples/start_states echo "/* /* commented out */ */ uncommented text /* */" | cargo run --package nimbleparse -- src/comment.l src/comment.y - +# Invoke `%grmtools{test_files}` +cargo run --package nimbleparse -- src/comment.l src/comment.y cd $root RUSTDOCFLAGS="-Dwarnings" cargo doc --no-deps diff --git a/cfgrammar/src/lib/header.rs b/cfgrammar/src/lib/header.rs index 21e29fe27..4598c4638 100644 --- a/cfgrammar/src/lib/header.rs +++ b/cfgrammar/src/lib/header.rs @@ -210,56 +210,6 @@ impl From> for Value { } } -impl Value { - pub fn expect_string_with_context(&self, ctxt: &str) -> Result<&str, Box> { - let found = match self { - Value::Flag(_, _) => "bool".to_string(), - Value::Setting(Setting::String(s, _)) => { - return Ok(s); - } - Value::Setting(Setting::Num(_, _)) => "numeric".to_string(), - Value::Setting(Setting::Unitary(Namespaced { - namespace, - member: (member, _), - })) => { - if let Some((ns, _)) = namespace { - format!("'{ns}::{member}'") - } else { - format!("'{member}'") - } - } - Value::Setting(Setting::Array(_, _, _)) => "array".to_string(), - Value::Setting(Setting::Constructor { - ctor: - Namespaced { - namespace: ctor_ns, - member: (ctor_memb, _), - }, - arg: - Namespaced { - namespace: arg_ns, - member: (arg_memb, _), - }, - }) => { - format!( - "'{}({})'", - if let Some((ns, _)) = ctor_ns { - format!("{ns}::{ctor_memb}") - } else { - arg_memb.to_string() - }, - if let Some((ns, _)) = arg_ns { - format!("{ns}::{arg_memb}") - } else { - arg_memb.to_string() - } - ) - } - }; - Err(format!("Expected 'String' value, found {}, at {ctxt}", found).into()) - } -} - static RE_LEADING_WS: LazyLock = LazyLock::new(|| Regex::new(r"^[\p{Pattern_White_Space}]*").unwrap()); static RE_NAME: LazyLock = LazyLock::new(|| { diff --git a/nimbleparse/src/main.rs b/nimbleparse/src/main.rs index ea17c9845..173626b40 100644 --- a/nimbleparse/src/main.rs +++ b/nimbleparse/src/main.rs @@ -1,6 +1,6 @@ use cfgrammar::{ Location, RIdx, Span, TIdx, - header::{GrmtoolsSectionParser, Header, HeaderError, HeaderValue, Value}, + header::{GrmtoolsSectionParser, Header, HeaderError, HeaderValue, Setting, Value}, markmap::Entry, yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind, ast::ASTWithValidityInfo}, }; @@ -495,7 +495,7 @@ impl fmt::Display for NimbleparseError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::Source { src_path, errs } => { - writeln!(f, "While parsing: {}", src_path.display())?; + writeln!(f, "While parsing {}:", src_path.display())?; for e in errs { writeln!(f, "{}", e)? } @@ -543,77 +543,99 @@ where } fn parse_many(self, input_paths: &[String]) -> Result<(), NimbleparseError> { - let input_paths = if input_paths.is_empty() { + let mut paths = Vec::new(); + if !input_paths.is_empty() { + paths.extend( + input_paths + .iter() + .map(PathBuf::from) + .collect::>(), + ); + } else { // If given no input paths, try to find some with `test_files` in the header. - if let Some(HeaderValue(_, val)) = self.header.get("test_files") { - let s = val.expect_string_with_context("'test_files' in %grmtools")?; - if let Some(yacc_y_path_dir) = self.yacc_y_path.parent() { - let joined = yacc_y_path_dir.join(s); - let joined = joined.as_os_str().to_str(); - if let Some(s) = joined { - let mut paths = glob::glob(s)?.peekable(); - if paths.peek().is_none() { - return Err(NimbleparseError::Other( - format!("'test_files' glob '{}' matched no paths", s) - .to_string() - .into(), - )); - } - let mut input_paths = Vec::new(); - for path in paths { - let path = path?; - if let Some(ext) = path.extension() { - if let Some(ext) = ext.to_str() { - if ext.starts_with("grm") { - Err(NimbleparseError::Other( - "test_files extensions beginning with `grm` are reserved." - .into(), - ))? + match self.header.get("test_files") { + Some(HeaderValue(_, Value::Setting(Setting::Array(test_globs, _, _)))) => { + for setting in test_globs { + match setting { + Setting::String(s, _) => { + if let Some(yacc_y_path_dir) = self.yacc_y_path.parent() { + let joined = yacc_y_path_dir.join(s); + let joined = joined.as_os_str().to_str(); + if let Some(s) = joined { + let mut glob_paths = glob::glob(s)?.peekable(); + if glob_paths.peek().is_none() { + return Err(NimbleparseError::Other( + format!( + "'test_files' glob '{}' matched no paths", + s + ) + .to_string() + .into(), + )); + } + for path in glob_paths { + let path = path?; + if let Some(ext) = path.extension() { + if let Some(ext) = ext.to_str() { + if ext.starts_with("grm") { + Err(NimbleparseError::Other( + "test_files extensions beginning with `grm` are reserved." + .into(), + ))? + } + } + } + paths.push(path); + } + } else { + return Err(NimbleparseError::Other( + format!( + "Unable to convert joined path to str {} with glob '{}'", + self.yacc_y_path.display(), + s + ) + .into(), + )); } + } else { + return Err(NimbleparseError::Other( + format!( + "Unable to find parent path for {}", + self.yacc_y_path.display() + ) + .into(), + )); } } - input_paths.push(path); + + _ => { + return Err(NimbleparseError::Other( + "Expected string values in `test_files`".into(), + )); + } } - input_paths - } else { - return Err(NimbleparseError::Other( - format!( - "Unable to convert joined path to str {} with glob '{}'", - self.yacc_y_path.display(), - s - ) - .into(), - )); } - } else { + } + Some(_) => { return Err(NimbleparseError::Other( - format!( - "Unable to find parent path for {}", - self.yacc_y_path.display() - ) - .into(), + "Expected Array of string values in `test_files`".into(), + )); + } + None => { + return Err(NimbleparseError::Other( + "Missing argument".into(), )); } - } else { - return Err(NimbleparseError::Other( - "Missing argument".into(), - )); } - } else { - // Just convert the given arguments to paths. - input_paths - .iter() - .map(PathBuf::from) - .collect::>() }; - if input_paths.is_empty() { + if paths.is_empty() { return Err(NimbleparseError::Other( "Missing argument".into(), )); } let pb = RTParserBuilder::new(&self.grm, &self.stable).recoverer(self.recoverykind); // Actually parse the given arguments or the `test_files` specified in the grammar. - for input_path in input_paths { + for input_path in paths { let input = read_file(&input_path); let lexer = self.lexerdef.lexer(&input); let (pt, errs) =