diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 41df9ea4b538..d841fc767b60 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -81,6 +81,14 @@ disas = ["anyhow", "capstone"] # Enables detailed logging which can be somewhat expensive. trace-log = ["regalloc2/trace-log"] +# By default, an ISLE term is compiled into a single Rust function, but it can be +# significantly inefficient for large terms (e.g. `simplify` with hundreds of rules). +# This is because the generated Rust code for such terms is large, and `rustc` takes quadratically longer to compile huge functions. +# This feature splits large match arms in such ISLE terms into closures, for compiling ISLE terms more efficiently. +# However, this can degrade Cranelift compilation times, introducing ABI boundaries between the closures. +# Therefore, we recommend enabling this feature only for debugging/development purposes. +isle-split-match = [] + # This enables unwind info generation functionality. unwind = ["gimli"] diff --git a/cranelift/codegen/build.rs b/cranelift/codegen/build.rs index 4d0037963fdf..18cc7b8bf1f6 100644 --- a/cranelift/codegen/build.rs +++ b/cranelift/codegen/build.rs @@ -216,6 +216,14 @@ fn run_compilation(compilation: &IsleCompilation) -> Result<(), Errors> { // the generated code to help debug rule matching. options.emit_logging = std::env::var("CARGO_FEATURE_TRACE_LOG").is_ok(); + // Enable optional match-arm splitting in iterator terms for faster compile times. + options.split_match_arms = std::env::var("CARGO_FEATURE_ISLE_SPLIT_MATCH").is_ok(); + if let Ok(value) = std::env::var("ISLE_SPLIT_MATCH_THRESHOLD") { + options.match_arm_split_threshold = Some(value.parse().unwrap_or_else(|err| { + panic!("invalid ISLE_SPLIT_MATCH_THRESHOLD value '{value}': {err}"); + })); + } + if let Ok(out_dir) = std::env::var("OUT_DIR") { options.prefixes.push(isle::codegen::Prefix { prefix: out_dir, diff --git a/cranelift/isle/isle/src/codegen.rs b/cranelift/isle/isle/src/codegen.rs index 21f35b4f21ee..3d1cb1089ac3 100644 --- a/cranelift/isle/isle/src/codegen.rs +++ b/cranelift/isle/isle/src/codegen.rs @@ -11,6 +11,8 @@ use std::fmt::Write; use std::slice::Iter; use std::sync::Arc; +const DEFAULT_MATCH_ARM_BODY_CLOSURE_THRESHOLD: usize = 256; + /// Options for code generation. #[derive(Clone, Debug, Default)] pub struct CodegenOptions { @@ -28,6 +30,17 @@ pub struct CodegenOptions { /// In Cranelift this is typically controlled by a cargo feature on the /// crate that includes the generated code (e.g. `cranelift-codegen`). pub emit_logging: bool, + + /// Split large match arms into local closures when generating iterator terms. + /// + /// In Cranelift this is typically controlled by a cargo feature on the + /// crate that includes the generated code (e.g. `cranelift-codegen`). + pub split_match_arms: bool, + + /// Threshold for splitting match arms into local closures. + /// + /// If `None`, a default threshold is used. + pub match_arm_split_threshold: Option, } /// A path prefix which should be replaced when printing file names. @@ -72,10 +85,29 @@ struct BodyContext<'a, W> { is_bound: StableSet, term_name: &'a str, emit_logging: bool, + split_match_arms: bool, + match_arm_split_threshold: Option, + + // Extra fields for iterator-returning terms. + // These fields are used to generate optimized Rust code for iterator-returning terms. + /// The number of match splits that have been generated. + /// This is used to generate unique names for the match splits. + match_split: usize, + + /// The action to take when the iterator overflows. + iter_overflow_action: &'static str, } impl<'a, W: Write> BodyContext<'a, W> { - fn new(out: &'a mut W, ruleset: &'a RuleSet, term_name: &'a str, emit_logging: bool) -> Self { + fn new( + out: &'a mut W, + ruleset: &'a RuleSet, + term_name: &'a str, + emit_logging: bool, + split_match_arms: bool, + match_arm_split_threshold: Option, + iter_overflow_action: &'static str, + ) -> Self { Self { out, ruleset, @@ -84,6 +116,10 @@ impl<'a, W: Write> BodyContext<'a, W> { is_bound: Default::default(), term_name, emit_logging, + split_match_arms, + match_arm_split_threshold, + match_split: Default::default(), + iter_overflow_action, } } @@ -426,7 +462,19 @@ impl Length for ContextIterWrapper {{ let termdata = &self.termenv.terms[termid.index()]; let term_name = &self.typeenv.syms[termdata.name.index()]; - let mut ctx = BodyContext::new(code, ruleset, term_name, options.emit_logging); + + // Split a match if the term returns an iterator. + let mut ctx = BodyContext::new( + code, + ruleset, + term_name, + options.emit_logging, + options.split_match_arms, + options.match_arm_split_threshold, + "return;", // At top level, we just return. + ); + + // Generate the function signature. writeln!(ctx.out)?; writeln!( ctx.out, @@ -470,6 +518,7 @@ impl Length for ContextIterWrapper {{ ReturnKind::Option => write!(ctx.out, "Option<{ret}>")?, ReturnKind::Plain => write!(ctx.out, "{ret}")?, }; + // Generating the function signature is done. let last_expr = if let Some(EvalStep { check: ControlFlow::Return { .. }, @@ -530,6 +579,21 @@ impl Length for ContextIterWrapper {{ Nested::Cases(block.steps.iter()) } + fn block_weight(block: &Block) -> usize { + fn cf_weight(cf: &ControlFlow) -> usize { + match cf { + ControlFlow::Match { arms, .. } => { + arms.iter().map(|a| Codegen::block_weight(&a.body)).sum() + } + ControlFlow::Equal { body, .. } => Codegen::block_weight(body), + ControlFlow::Loop { body, .. } => Codegen::block_weight(body), + ControlFlow::Return { .. } => 0, + } + } + + block.steps.iter().map(|s| 1 + cf_weight(&s.check)).sum() + } + fn emit_block( &self, ctx: &mut BodyContext, @@ -538,8 +602,19 @@ impl Length for ContextIterWrapper {{ last_expr: &str, scope: StableSet, ) -> std::fmt::Result { - let mut stack = Vec::new(); ctx.begin_block()?; + self.emit_block_contents(ctx, block, ret_kind, last_expr, scope) + } + + fn emit_block_contents( + &self, + ctx: &mut BodyContext, + block: &Block, + ret_kind: ReturnKind, + last_expr: &str, + scope: StableSet, + ) -> std::fmt::Result { + let mut stack = Vec::new(); stack.push((Self::validate_block(ret_kind, block), last_expr, scope)); while let Some((mut nested, last_line, scope)) = stack.pop() { @@ -706,8 +781,8 @@ impl Length for ContextIterWrapper {{ writeln!(ctx.out, "));")?; writeln!( ctx.out, - "{}if returns.len() >= MAX_ISLE_RETURNS {{ return; }}", - ctx.indent + "{}if returns.len() >= MAX_ISLE_RETURNS {{ {} }}", + ctx.indent, ctx.iter_overflow_action )?; } } @@ -729,7 +804,42 @@ impl Length for ContextIterWrapper {{ self.emit_constraint(ctx, source, arm)?; write!(ctx.out, " =>")?; ctx.begin_block()?; - stack.push((Self::validate_block(ret_kind, &arm.body), "", scope)); + + // Compile-time optimization: huge function bodies (often from very large match arms + // of constructor bodies)cause rustc to spend a lot of time in analysis passes. + // Wrap such bodies in a local closure to move the bulk of the work into a separate body + // without needing to know the types of captured locals. + let match_arm_body_closure_threshold = ctx + .match_arm_split_threshold + .unwrap_or(DEFAULT_MATCH_ARM_BODY_CLOSURE_THRESHOLD); + if ctx.split_match_arms + && ret_kind == ReturnKind::Iterator + && Codegen::block_weight(&arm.body) > match_arm_body_closure_threshold + { + let closure_id = ctx.match_split; + ctx.match_split += 1; + + write!(ctx.out, "{}if (|| -> bool", &ctx.indent)?; + ctx.begin_block()?; + + let old_overflow_action = ctx.iter_overflow_action; + ctx.iter_overflow_action = "return true;"; + let closure_scope = ctx.enter_scope(); + self.emit_block_contents(ctx, &arm.body, ret_kind, "false", closure_scope)?; + ctx.iter_overflow_action = old_overflow_action; + + // Close `if (|| -> bool { ... })()` and stop the outer function on + // iterator-overflow. + writeln!( + ctx.out, + "{})() {{ {} }} // __isle_arm_{}", + &ctx.indent, ctx.iter_overflow_action, closure_id + )?; + + ctx.end_block("", scope)?; + } else { + stack.push((Self::validate_block(ret_kind, &arm.body), "", scope)); + } } } }