From a7db47cc9064b4750b3e4125d270f127e008efc3 Mon Sep 17 00:00:00 2001 From: Bongjun Date: Fri, 9 Jan 2026 05:23:46 +0000 Subject: [PATCH 1/3] optimize isle codegen for rustc --- cranelift/isle/isle/src/codegen.rs | 98 ++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 6 deletions(-) diff --git a/cranelift/isle/isle/src/codegen.rs b/cranelift/isle/isle/src/codegen.rs index 21f35b4f21ee..9840bed6b0ac 100644 --- a/cranelift/isle/isle/src/codegen.rs +++ b/cranelift/isle/isle/src/codegen.rs @@ -72,10 +72,25 @@ struct BodyContext<'a, W> { is_bound: StableSet, term_name: &'a str, emit_logging: bool, + + // Extra fields for iterator-returning terms. + // These fields are used to generate optimized Rust code for iterator-returning terms. + /// The number of match splits that have been generated. + /// This is used to generate unique names for the match splits. + match_split: usize, + + /// The action to take when the iterator overflows. + iter_overflow_action: &'static str, } impl<'a, W: Write> BodyContext<'a, W> { - fn new(out: &'a mut W, ruleset: &'a RuleSet, term_name: &'a str, emit_logging: bool) -> Self { + fn new( + out: &'a mut W, + ruleset: &'a RuleSet, + term_name: &'a str, + emit_logging: bool, + iter_overflow_action: &'static str, + ) -> Self { Self { out, ruleset, @@ -84,6 +99,8 @@ impl<'a, W: Write> BodyContext<'a, W> { is_bound: Default::default(), term_name, emit_logging, + match_split: Default::default(), + iter_overflow_action, } } @@ -426,7 +443,17 @@ impl Length for ContextIterWrapper {{ let termdata = &self.termenv.terms[termid.index()]; let term_name = &self.typeenv.syms[termdata.name.index()]; - let mut ctx = BodyContext::new(code, ruleset, term_name, options.emit_logging); + + // Split a match if the term returns an iterator. + let mut ctx = BodyContext::new( + code, + ruleset, + term_name, + options.emit_logging, + "return;", // At top level, we just return. + ); + + // Generate the function signature. writeln!(ctx.out)?; writeln!( ctx.out, @@ -470,6 +497,7 @@ impl Length for ContextIterWrapper {{ ReturnKind::Option => write!(ctx.out, "Option<{ret}>")?, ReturnKind::Plain => write!(ctx.out, "{ret}")?, }; + // Generating the function signature is done. let last_expr = if let Some(EvalStep { check: ControlFlow::Return { .. }, @@ -530,6 +558,21 @@ impl Length for ContextIterWrapper {{ Nested::Cases(block.steps.iter()) } + fn block_weight(block: &Block) -> usize { + fn cf_weight(cf: &ControlFlow) -> usize { + match cf { + ControlFlow::Match { arms, .. } => { + arms.iter().map(|a| Codegen::block_weight(&a.body)).sum() + } + ControlFlow::Equal { body, .. } => Codegen::block_weight(body), + ControlFlow::Loop { body, .. } => Codegen::block_weight(body), + ControlFlow::Return { .. } => 0, + } + } + + block.steps.iter().map(|s| 1 + cf_weight(&s.check)).sum() + } + fn emit_block( &self, ctx: &mut BodyContext, @@ -538,8 +581,19 @@ impl Length for ContextIterWrapper {{ last_expr: &str, scope: StableSet, ) -> std::fmt::Result { - let mut stack = Vec::new(); ctx.begin_block()?; + self.emit_block_contents(ctx, block, ret_kind, last_expr, scope) + } + + fn emit_block_contents( + &self, + ctx: &mut BodyContext, + block: &Block, + ret_kind: ReturnKind, + last_expr: &str, + scope: StableSet, + ) -> std::fmt::Result { + let mut stack = Vec::new(); stack.push((Self::validate_block(ret_kind, block), last_expr, scope)); while let Some((mut nested, last_line, scope)) = stack.pop() { @@ -706,8 +760,8 @@ impl Length for ContextIterWrapper {{ writeln!(ctx.out, "));")?; writeln!( ctx.out, - "{}if returns.len() >= MAX_ISLE_RETURNS {{ return; }}", - ctx.indent + "{}if returns.len() >= MAX_ISLE_RETURNS {{ {} }}", + ctx.indent, ctx.iter_overflow_action )?; } } @@ -729,7 +783,39 @@ impl Length for ContextIterWrapper {{ self.emit_constraint(ctx, source, arm)?; write!(ctx.out, " =>")?; ctx.begin_block()?; - stack.push((Self::validate_block(ret_kind, &arm.body), "", scope)); + + // Compile-time optimization: huge function bodies (often from very large match arms + // of constructor bodies)cause rustc to spend a lot of time in analysis passes. + // Wrap such bodies in a local closure to move the bulk of the work into a separate body + // without needing to know the types of captured locals. + const MATCH_ARM_BODY_CLOSURE_THRESHOLD: usize = 256; + if ret_kind == ReturnKind::Iterator + && Codegen::block_weight(&arm.body) > MATCH_ARM_BODY_CLOSURE_THRESHOLD + { + let closure_id = ctx.match_split; + ctx.match_split += 1; + + write!(ctx.out, "{}if (|| -> bool", &ctx.indent)?; + ctx.begin_block()?; + + let old_overflow_action = ctx.iter_overflow_action; + ctx.iter_overflow_action = "return true;"; + let closure_scope = ctx.enter_scope(); + self.emit_block_contents(ctx, &arm.body, ret_kind, "false", closure_scope)?; + ctx.iter_overflow_action = old_overflow_action; + + // Close `if (|| -> bool { ... })()` and stop the outer function on + // iterator-overflow. + writeln!( + ctx.out, + "{})() {{ {} }} // __isle_arm_{}", + &ctx.indent, ctx.iter_overflow_action, closure_id + )?; + + ctx.end_block("", scope)?; + } else { + stack.push((Self::validate_block(ret_kind, &arm.body), "", scope)); + } } } } From 0b45938e4a0a70e34604f7d73c495dba26804e93 Mon Sep 17 00:00:00 2001 From: Bongjun Date: Tue, 27 Jan 2026 04:15:12 +0000 Subject: [PATCH 2/3] opt-in for optimizing isle codegen --- cranelift/codegen/Cargo.toml | 8 ++++++++ cranelift/codegen/build.rs | 3 +++ cranelift/isle/isle/src/codegen.rs | 13 ++++++++++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 41df9ea4b538..d841fc767b60 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -81,6 +81,14 @@ disas = ["anyhow", "capstone"] # Enables detailed logging which can be somewhat expensive. trace-log = ["regalloc2/trace-log"] +# By default, an ISLE term is compiled into a single Rust function, but it can be +# significantly inefficient for large terms (e.g. `simplify` with hundreds of rules). +# This is because the generated Rust code for such terms is large, and `rustc` takes quadratically longer to compile huge functions. +# This feature splits large match arms in such ISLE terms into closures, for compiling ISLE terms more efficiently. +# However, this can degrade Cranelift compilation times, introducing ABI boundaries between the closures. +# Therefore, we recommend enabling this feature only for debugging/development purposes. +isle-split-match = [] + # This enables unwind info generation functionality. unwind = ["gimli"] diff --git a/cranelift/codegen/build.rs b/cranelift/codegen/build.rs index 4d0037963fdf..9303d40cdd52 100644 --- a/cranelift/codegen/build.rs +++ b/cranelift/codegen/build.rs @@ -216,6 +216,9 @@ fn run_compilation(compilation: &IsleCompilation) -> Result<(), Errors> { // the generated code to help debug rule matching. options.emit_logging = std::env::var("CARGO_FEATURE_TRACE_LOG").is_ok(); + // Enable optional match-arm splitting in iterator terms for faster compile times. + options.split_match_arms = std::env::var("CARGO_FEATURE_ISLE_SPLIT_MATCH").is_ok(); + if let Ok(out_dir) = std::env::var("OUT_DIR") { options.prefixes.push(isle::codegen::Prefix { prefix: out_dir, diff --git a/cranelift/isle/isle/src/codegen.rs b/cranelift/isle/isle/src/codegen.rs index 9840bed6b0ac..579f150a7941 100644 --- a/cranelift/isle/isle/src/codegen.rs +++ b/cranelift/isle/isle/src/codegen.rs @@ -28,6 +28,12 @@ pub struct CodegenOptions { /// In Cranelift this is typically controlled by a cargo feature on the /// crate that includes the generated code (e.g. `cranelift-codegen`). pub emit_logging: bool, + + /// Split large match arms into local closures when generating iterator terms. + /// + /// In Cranelift this is typically controlled by a cargo feature on the + /// crate that includes the generated code (e.g. `cranelift-codegen`). + pub split_match_arms: bool, } /// A path prefix which should be replaced when printing file names. @@ -72,6 +78,7 @@ struct BodyContext<'a, W> { is_bound: StableSet, term_name: &'a str, emit_logging: bool, + split_match_arms: bool, // Extra fields for iterator-returning terms. // These fields are used to generate optimized Rust code for iterator-returning terms. @@ -89,6 +96,7 @@ impl<'a, W: Write> BodyContext<'a, W> { ruleset: &'a RuleSet, term_name: &'a str, emit_logging: bool, + split_match_arms: bool, iter_overflow_action: &'static str, ) -> Self { Self { @@ -99,6 +107,7 @@ impl<'a, W: Write> BodyContext<'a, W> { is_bound: Default::default(), term_name, emit_logging, + split_match_arms, match_split: Default::default(), iter_overflow_action, } @@ -450,6 +459,7 @@ impl Length for ContextIterWrapper {{ ruleset, term_name, options.emit_logging, + options.split_match_arms, "return;", // At top level, we just return. ); @@ -789,7 +799,8 @@ impl Length for ContextIterWrapper {{ // Wrap such bodies in a local closure to move the bulk of the work into a separate body // without needing to know the types of captured locals. const MATCH_ARM_BODY_CLOSURE_THRESHOLD: usize = 256; - if ret_kind == ReturnKind::Iterator + if ctx.split_match_arms + && ret_kind == ReturnKind::Iterator && Codegen::block_weight(&arm.body) > MATCH_ARM_BODY_CLOSURE_THRESHOLD { let closure_id = ctx.match_split; From 19d3f56b9995a3e64d41e7aa65654b9889474b1d Mon Sep 17 00:00:00 2001 From: Bongjun Date: Tue, 27 Jan 2026 04:20:45 +0000 Subject: [PATCH 3/3] have a controller threshold --- cranelift/codegen/build.rs | 5 +++++ cranelift/isle/isle/src/codegen.rs | 17 +++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/build.rs b/cranelift/codegen/build.rs index 9303d40cdd52..18cc7b8bf1f6 100644 --- a/cranelift/codegen/build.rs +++ b/cranelift/codegen/build.rs @@ -218,6 +218,11 @@ fn run_compilation(compilation: &IsleCompilation) -> Result<(), Errors> { // Enable optional match-arm splitting in iterator terms for faster compile times. options.split_match_arms = std::env::var("CARGO_FEATURE_ISLE_SPLIT_MATCH").is_ok(); + if let Ok(value) = std::env::var("ISLE_SPLIT_MATCH_THRESHOLD") { + options.match_arm_split_threshold = Some(value.parse().unwrap_or_else(|err| { + panic!("invalid ISLE_SPLIT_MATCH_THRESHOLD value '{value}': {err}"); + })); + } if let Ok(out_dir) = std::env::var("OUT_DIR") { options.prefixes.push(isle::codegen::Prefix { diff --git a/cranelift/isle/isle/src/codegen.rs b/cranelift/isle/isle/src/codegen.rs index 579f150a7941..3d1cb1089ac3 100644 --- a/cranelift/isle/isle/src/codegen.rs +++ b/cranelift/isle/isle/src/codegen.rs @@ -11,6 +11,8 @@ use std::fmt::Write; use std::slice::Iter; use std::sync::Arc; +const DEFAULT_MATCH_ARM_BODY_CLOSURE_THRESHOLD: usize = 256; + /// Options for code generation. #[derive(Clone, Debug, Default)] pub struct CodegenOptions { @@ -34,6 +36,11 @@ pub struct CodegenOptions { /// In Cranelift this is typically controlled by a cargo feature on the /// crate that includes the generated code (e.g. `cranelift-codegen`). pub split_match_arms: bool, + + /// Threshold for splitting match arms into local closures. + /// + /// If `None`, a default threshold is used. + pub match_arm_split_threshold: Option, } /// A path prefix which should be replaced when printing file names. @@ -79,6 +86,7 @@ struct BodyContext<'a, W> { term_name: &'a str, emit_logging: bool, split_match_arms: bool, + match_arm_split_threshold: Option, // Extra fields for iterator-returning terms. // These fields are used to generate optimized Rust code for iterator-returning terms. @@ -97,6 +105,7 @@ impl<'a, W: Write> BodyContext<'a, W> { term_name: &'a str, emit_logging: bool, split_match_arms: bool, + match_arm_split_threshold: Option, iter_overflow_action: &'static str, ) -> Self { Self { @@ -108,6 +117,7 @@ impl<'a, W: Write> BodyContext<'a, W> { term_name, emit_logging, split_match_arms, + match_arm_split_threshold, match_split: Default::default(), iter_overflow_action, } @@ -460,6 +470,7 @@ impl Length for ContextIterWrapper {{ term_name, options.emit_logging, options.split_match_arms, + options.match_arm_split_threshold, "return;", // At top level, we just return. ); @@ -798,10 +809,12 @@ impl Length for ContextIterWrapper {{ // of constructor bodies)cause rustc to spend a lot of time in analysis passes. // Wrap such bodies in a local closure to move the bulk of the work into a separate body // without needing to know the types of captured locals. - const MATCH_ARM_BODY_CLOSURE_THRESHOLD: usize = 256; + let match_arm_body_closure_threshold = ctx + .match_arm_split_threshold + .unwrap_or(DEFAULT_MATCH_ARM_BODY_CLOSURE_THRESHOLD); if ctx.split_match_arms && ret_kind == ReturnKind::Iterator - && Codegen::block_weight(&arm.body) > MATCH_ARM_BODY_CLOSURE_THRESHOLD + && Codegen::block_weight(&arm.body) > match_arm_body_closure_threshold { let closure_id = ctx.match_split; ctx.match_split += 1;