From e5a40854f2201c2a42876dd358744f415822e1e7 Mon Sep 17 00:00:00 2001 From: Quang Pham Date: Thu, 19 Feb 2026 15:11:18 -0600 Subject: [PATCH 1/9] add methods to specify memory placement --- crates/cuda_builder/src/lib.rs | 105 +++++++- crates/rustc_codegen_nvvm/src/context.rs | 303 +++++++++++++++-------- 2 files changed, 298 insertions(+), 110 deletions(-) diff --git a/crates/cuda_builder/src/lib.rs b/crates/cuda_builder/src/lib.rs index 6441b827..450f3acf 100644 --- a/crates/cuda_builder/src/lib.rs +++ b/crates/cuda_builder/src/lib.rs @@ -56,6 +56,27 @@ pub enum EmitOption { Bitcode, } +/// Specifies which CUDA memory space a static should be placed in. +/// +/// Used with [`CudaBuilder::place_static`] and [`CudaBuilder::crate_memory_space`] +/// to control placement of statics in constant or global memory. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum MemorySpace { + /// Global memory (address space 1). Slower but virtually unlimited. + Global, + /// Constant memory (address space 4). Fast, cached, but limited to ~64KB total. + Constant, +} + +impl MemorySpace { + fn as_str(&self) -> &'static str { + match self { + MemorySpace::Global => "global", + MemorySpace::Constant => "constant", + } + } +} + /// A builder for easily compiling Rust GPU crates in build.rs pub struct CudaBuilder { path_to_crate: PathBuf, @@ -174,8 +195,10 @@ pub struct CudaBuilder { pub override_libm: bool, /// If `true`, the codegen will attempt to place `static` variables in CUDA's /// constant memory, which is fast but limited in size (~64KB total across all - /// statics). The codegen avoids placing any single item too large, but it does not - /// track cumulative size. Exceeding the limit may cause `IllegalAddress` runtime + /// statics). The codegen avoids placing any single item too large, + /// it does track cumulative size and will emit a compile-time + /// error if the total exceeds the limit. + /// Exceeding the limit may cause `IllegalAddress` runtime /// errors (CUDA error code: `700`). /// /// The default is `false`, which places all statics in global memory. This avoids @@ -184,9 +207,20 @@ pub struct CudaBuilder { /// `#[cuda_std::address_space(constant)]` to place them in constant memory /// manually. This option only affects automatic placement. /// - /// Future versions may support smarter placement and user-controlled - /// packing/spilling strategies. + /// Use [`place_static`](Self::place_static) and + /// [`crate_memory_space`](Self::crate_memory_space) to override placement for + /// individual statics or entire crates (including third-party crates). pub use_constant_memory_space: bool, + /// Per-static memory placement overrides. Keys are Rust path strings + /// (e.g., `"my_crate::module::MY_STATIC"`). These take priority over per-crate + /// overrides and the global `use_constant_memory_space` flag, but NOT over + /// an explicit `#[cuda_std::address_space(...)]` attribute on the static itself. + pub static_memory_overrides: Vec<(String, MemorySpace)>, + /// Per-crate memory placement overrides. Keys are crate names + /// (e.g., `"ndarray"`). These take priority over the global `use_constant_memory_space` + /// flag, but NOT over per-static overrides or explicit `#[cuda_std::address_space(...)]` + /// attributes. + pub crate_memory_overrides: Vec<(String, MemorySpace)>, /// Whether to generate any debug info and what level of info to generate. pub debug: DebugInfo, /// Additional arguments passed to cargo during `cargo build`. @@ -213,6 +247,8 @@ impl CudaBuilder { optix: false, override_libm: true, use_constant_memory_space: false, + static_memory_overrides: vec![], + crate_memory_overrides: vec![], debug: DebugInfo::None, build_args: vec![], final_module_path: None, @@ -328,8 +364,10 @@ impl CudaBuilder { /// If `true`, the codegen will attempt to place `static` variables in CUDA's /// constant memory, which is fast but limited in size (~64KB total across all - /// statics). The codegen avoids placing any single item too large, but it does not - /// track cumulative size. Exceeding the limit may cause `IllegalAddress` runtime + /// statics). The codegen avoids placing any single item too large, + /// it does track cumulative size and will emit a compile-time + /// error if the total exceeds the limit. + /// Exceeding the limit may cause `IllegalAddress` runtime /// errors (CUDA error code: `700`). /// /// If `false`, all statics are placed in global memory. This avoids such errors but @@ -337,13 +375,56 @@ impl CudaBuilder { /// `static` variables with `#[cuda_std::address_space(constant)]` to place them in /// constant memory manually as this option only affects automatic placement. /// - /// Future versions may support smarter placement and user-controlled - /// packing/spilling strategies. + /// Use [`place_static`](Self::place_static) and + /// [`crate_memory_space`](Self::crate_memory_space) to override placement for + /// individual statics or entire crates. pub fn use_constant_memory_space(mut self, use_constant_memory_space: bool) -> Self { self.use_constant_memory_space = use_constant_memory_space; self } + /// Override the memory placement for a specific static by its full Rust path. + /// + /// This takes priority over per-crate overrides and the global + /// `use_constant_memory_space` flag, but NOT over an explicit + /// `#[cuda_std::address_space(...)]` attribute on the static itself. + /// + /// The path is matched against the compiler's `def_path_str` for each static. + /// You can use a full path (e.g., `"my_crate::module::MY_STATIC"`) or a suffix + /// that matches at a `::` boundary (e.g., `"module::MY_STATIC"` or `"MY_STATIC"`). + /// + /// # Example + /// ```no_run + /// # use cuda_builder::{CudaBuilder, MemorySpace}; + /// CudaBuilder::new("my_gpu_crate") + /// .use_constant_memory_space(true) + /// .place_static("dep_crate::BIG_TABLE", MemorySpace::Global) + /// .place_static("my_crate::HOT_DATA", MemorySpace::Constant); + /// ``` + pub fn place_static(mut self, path: &str, space: MemorySpace) -> Self { + self.static_memory_overrides.push((path.to_string(), space)); + self + } + + /// Override the default memory placement for all statics from a given crate. + /// + /// This takes priority over the global `use_constant_memory_space` flag, + /// but NOT over per-static overrides or explicit `#[cuda_std::address_space(...)]` + /// attributes. + /// + /// # Example + /// ```no_run + /// # use cuda_builder::{CudaBuilder, MemorySpace}; + /// CudaBuilder::new("my_gpu_crate") + /// .use_constant_memory_space(true) + /// .crate_memory_space("ndarray", MemorySpace::Global) + /// .crate_memory_space("my_crate", MemorySpace::Constant); + /// ``` + pub fn crate_memory_space(mut self, crate_name: &str, space: MemorySpace) -> Self { + self.crate_memory_overrides.push((crate_name.to_string(), space)); + self + } + /// An optional path where to dump LLVM IR of the final output the codegen will feed to libnvvm. Usually /// used for debugging. pub fn final_module_path(mut self, path: impl AsRef) -> Self { @@ -743,6 +824,14 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result { llvm_args.push("--use-constant-memory-space".to_string()); } + for (path, space) in &builder.static_memory_overrides { + llvm_args.push(format!("--static-memory={}={}", path, space.as_str())); + } + + for (crate_name, space) in &builder.crate_memory_overrides { + llvm_args.push(format!("--crate-memory={}={}", crate_name, space.as_str())); + } + if let Some(path) = &builder.final_module_path { llvm_args.push("--final-module-path".to_string()); llvm_args.push(path.to_str().unwrap().to_string()); diff --git a/crates/rustc_codegen_nvvm/src/context.rs b/crates/rustc_codegen_nvvm/src/context.rs index 1a5582df..cf4740bd 100644 --- a/crates/rustc_codegen_nvvm/src/context.rs +++ b/crates/rustc_codegen_nvvm/src/context.rs @@ -262,126 +262,177 @@ impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { } } +/// Checks if a compiler `def_path` matches a user-provided `pattern`. +/// +/// Supports exact match or suffix match at a `::` boundary to avoid false +/// substring matches. For example, `"my_crate::mod::STATIC"` matches patterns +/// `"my_crate::mod::STATIC"`, `"mod::STATIC"`, and `"STATIC"`, but +/// `"my_crate::mod::MY_STATIC"` does NOT match pattern `"STATIC"` via substring +/// (it would only match `"MY_STATIC"`). +fn path_matches(def_path: &str, pattern: &str) -> bool { + if def_path == pattern { + return true; + } + if def_path.ends_with(pattern) { + let prefix_len = def_path.len() - pattern.len(); + def_path[..prefix_len].ends_with("::") + } else { + false + } +} + impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { + /// Resolves the memory space for a static based on per-static and per-crate overrides. + /// Returns `None` if no override applies (caller falls through to the global flag). + fn resolve_memory_space(&self, instance: Instance<'tcx>) -> Option { + let def_id = instance.def_id(); + let def_path = self.tcx.def_path_str(def_id); + + // Priority 2: Per-static override + for (pattern, space) in &self.codegen_args.static_memory_overrides { + if path_matches(&def_path, pattern) { + return Some(*space); + } + } + + // Priority 3: Per-crate override + let crate_name = self.tcx.crate_name(def_id.krate); + for (name, space) in &self.codegen_args.crate_memory_overrides { + if crate_name.as_str() == name { + return Some(*space); + } + } + + // Priority 4: No override + None + } + /// Computes the address space for a static. + /// + /// Priority system (highest to lowest): + /// 1. Explicit `#[cuda_std::address_space(...)]` attribute + /// 2. Per-static path override via CudaBuilder (`place_static`) + /// 3. Per-crate override via CudaBuilder (`crate_memory_space`) + /// 4. Global `use_constant_memory_space` flag pub fn static_addrspace(&self, instance: Instance<'tcx>) -> AddressSpace { let ty = instance.ty(self.tcx, self.typing_env()); let is_mutable = self.tcx().is_mutable_static(instance.def_id()); let attrs = self.tcx.get_all_attrs(instance.def_id()); // TODO: replace with get_attrs let nvvm_attrs = NvvmAttributes::parse(self, attrs); + // Priority 1: Explicit #[address_space] attribute always wins if let Some(addr) = nvvm_attrs.addrspace { return AddressSpace(addr as u32); } - if !is_mutable && self.type_is_freeze(ty) { - if !self.codegen_args.use_constant_memory_space { - // We aren't using constant memory, so put the instance in global memory. - AddressSpace(1) - } else { - // We are using constant memory, see if the instance will fit. - // - // FIXME(@LegNeato) ideally we keep track of what we have put into - // constant memory and when it is filled up spill instead of only - // spilling when a static is big. We'll probably want some packing - // strategy controlled by the user...for example, if you have one large - // static and many small ones, you might want the small ones to all be - // in constant memory or just the big one depending on your workload. - let layout = self.layout_of(ty); - let size_bytes = layout.size.bytes(); - let current_usage = self.constant_memory_usage.get(); - let new_usage = current_usage + size_bytes; - - // Check if this single static is too large for constant memory - if size_bytes > CONSTANT_MEMORY_SIZE_LIMIT_BYTES { - let def_id = instance.def_id(); - let span = self.tcx.def_span(def_id); - let mut diag = self.tcx.sess.dcx().struct_span_warn( - span, - format!( - "static `{instance}` is {size_bytes} bytes, exceeds the constant memory limit of {} bytes", - CONSTANT_MEMORY_SIZE_LIMIT_BYTES - ), - ); - diag.span_label(span, "static exceeds constant memory limit"); - diag.note("placing in global memory (performance may be reduced)"); - diag.help("use `#[cuda_std::address_space(global)]` to explicitly place this static in global memory"); - diag.emit(); - return AddressSpace(1); - } + // Mutable or non-freeze statics cannot go in constant memory + if is_mutable || !self.type_is_freeze(ty) { + return AddressSpace::ZERO; + } - // Check if adding this static would exceed the cumulative limit - if new_usage > CONSTANT_MEMORY_SIZE_LIMIT_BYTES { - let def_id = instance.def_id(); - let span = self.tcx.def_span(def_id); - let mut diag = self.tcx.sess.dcx().struct_span_err( - span, - format!( - "cannot place static `{instance}` ({size_bytes} bytes) in constant memory: \ - cumulative constant memory usage would be {new_usage} bytes, exceeding the {} byte limit", - CONSTANT_MEMORY_SIZE_LIMIT_BYTES - ), - ); - diag.span_label( - span, - format!( - "this static would cause total usage to exceed {} bytes", - CONSTANT_MEMORY_SIZE_LIMIT_BYTES - ), - ); - diag.note(format!( - "current constant memory usage: {current_usage} bytes" - )); - diag.note(format!("static size: {size_bytes} bytes")); - diag.note(format!("would result in: {new_usage} bytes total")); - - diag.help("move this or other statics to global memory using `#[cuda_std::address_space(global)]`"); - diag.help("reduce the total size of static data"); - diag.help("disable automatic constant memory placement by setting `.use_constant_memory_space(false)` on `CudaBuilder` in build.rs"); - - diag.emit(); - self.tcx.sess.dcx().abort_if_errors(); - unreachable!() - } + // Resolve memory space from overrides (priorities 2-4) + let want_constant = match self.resolve_memory_space(instance) { + Some(MemorySpace::Constant) => true, + Some(MemorySpace::Global) => false, + None => self.codegen_args.use_constant_memory_space, + }; + + if !want_constant { + return AddressSpace(1); + } + + // Placing in constant memory -- check size constraints + let layout = self.layout_of(ty); + let size_bytes = layout.size.bytes(); + let current_usage = self.constant_memory_usage.get(); + let new_usage = current_usage + size_bytes; + + // Check if this single static is too large for constant memory + if size_bytes > CONSTANT_MEMORY_SIZE_LIMIT_BYTES { + let def_id = instance.def_id(); + let span = self.tcx.def_span(def_id); + let mut diag = self.tcx.sess.dcx().struct_span_warn( + span, + format!( + "static `{instance}` is {size_bytes} bytes, exceeds the constant memory limit of {} bytes", + CONSTANT_MEMORY_SIZE_LIMIT_BYTES + ), + ); + diag.span_label(span, "static exceeds constant memory limit"); + diag.note("placing in global memory (performance may be reduced)"); + diag.help("use `#[cuda_std::address_space(global)]` to explicitly place this static in global memory"); + diag.emit(); + return AddressSpace(1); + } + + // Check if adding this static would exceed the cumulative limit + if new_usage > CONSTANT_MEMORY_SIZE_LIMIT_BYTES { + let def_id = instance.def_id(); + let span = self.tcx.def_span(def_id); + let mut diag = self.tcx.sess.dcx().struct_span_err( + span, + format!( + "cannot place static `{instance}` ({size_bytes} bytes) in constant memory: \ + cumulative constant memory usage would be {new_usage} bytes, exceeding the {} byte limit", + CONSTANT_MEMORY_SIZE_LIMIT_BYTES + ), + ); + diag.span_label( + span, + format!( + "this static would cause total usage to exceed {} bytes", + CONSTANT_MEMORY_SIZE_LIMIT_BYTES + ), + ); + diag.note(format!( + "current constant memory usage: {current_usage} bytes" + )); + diag.note(format!("static size: {size_bytes} bytes")); + diag.note(format!("would result in: {new_usage} bytes total")); + + diag.help("move this or other statics to global memory using `#[cuda_std::address_space(global)]` or `.place_static(\"path\", MemorySpace::Global)` in build.rs"); + diag.help("reduce the total size of static data"); + diag.help("disable automatic constant memory placement by setting `.use_constant_memory_space(false)` on `CudaBuilder` in build.rs"); + + diag.emit(); + self.tcx.sess.dcx().abort_if_errors(); + unreachable!() + } // If successfully placed in constant memory: update cumulative usage - self.constant_memory_usage.set(new_usage); + self.constant_memory_usage.set(new_usage); // If approaching the threshold: warns - if new_usage > CONSTANT_MEMORY_WARNING_THRESHOLD_BYTES - && current_usage <= CONSTANT_MEMORY_WARNING_THRESHOLD_BYTES - { - let def_id = instance.def_id(); - let span = self.tcx.def_span(def_id); - let usage_percent = - (new_usage as f64 / CONSTANT_MEMORY_SIZE_LIMIT_BYTES as f64) * 100.0; - let mut diag = self.tcx.sess.dcx().struct_span_warn( - span, - format!( - "constant memory usage is approaching the limit: {new_usage} / {} bytes ({usage_percent:.1}% used)", - CONSTANT_MEMORY_SIZE_LIMIT_BYTES - ), - ); - diag.span_label( - span, - "this placement brought you over 80% of constant memory capacity", - ); - diag.note(format!( - "only {} bytes of constant memory remain", - CONSTANT_MEMORY_SIZE_LIMIT_BYTES - new_usage - )); - diag.help("to prevent constant memory overflow, consider moving some statics to global memory using `#[cuda_std::address_space(global)]`"); - diag.emit(); - } - - trace!( - "Placing static `{instance}` ({size_bytes} bytes) in constant memory. Total usage: {new_usage} bytes" - ); - AddressSpace(4) - } - } else { - AddressSpace::ZERO + if new_usage > CONSTANT_MEMORY_WARNING_THRESHOLD_BYTES + && current_usage <= CONSTANT_MEMORY_WARNING_THRESHOLD_BYTES + { + let def_id = instance.def_id(); + let span = self.tcx.def_span(def_id); + let usage_percent = + (new_usage as f64 / CONSTANT_MEMORY_SIZE_LIMIT_BYTES as f64) * 100.0; + let mut diag = self.tcx.sess.dcx().struct_span_warn( + span, + format!( + "constant memory usage is approaching the limit: {new_usage} / {} bytes ({usage_percent:.1}% used)", + CONSTANT_MEMORY_SIZE_LIMIT_BYTES + ), + ); + diag.span_label( + span, + "this placement brought you over 80% of constant memory capacity", + ); + diag.note(format!( + "only {} bytes of constant memory remain", + CONSTANT_MEMORY_SIZE_LIMIT_BYTES - new_usage + )); + diag.help("to prevent constant memory overflow, consider moving some statics to global memory using `#[cuda_std::address_space(global)]` or `.place_static(\"path\", MemorySpace::Global)` in build.rs"); + diag.emit(); } + + trace!( + "Placing static `{instance}` ({size_bytes} bytes) in constant memory. Total usage: {new_usage} bytes" + ); + AddressSpace(4) } /// Declare a global value, returns the existing value if it was already declared. @@ -647,6 +698,26 @@ pub enum DisassembleMode { Globals, } +/// Specifies which CUDA memory space a static should be placed in. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum MemorySpace { + /// Global memory (address space 1). + Global, + /// Constant memory (address space 4). + Constant, +} + +impl MemorySpace { + pub fn from_str(s: &str) -> Option { + match s { + "global" => Some(Self::Global), + "constant" => Some(Self::Constant), + _ => None, + } + } + +} + #[derive(Default, Clone)] pub struct CodegenArgs { pub nvvm_options: Vec, @@ -654,6 +725,8 @@ pub struct CodegenArgs { pub use_constant_memory_space: bool, pub final_module_path: Option, pub disassemble: Option, + pub static_memory_overrides: Vec<(String, MemorySpace)>, + pub crate_memory_overrides: Vec<(String, MemorySpace)>, } impl CodegenArgs { @@ -712,6 +785,32 @@ impl CodegenArgs { skip_next = true; } else if let Some(entry) = arg.strip_prefix("--disassemble-entry=") { cg_args.disassemble = Some(DisassembleMode::Entry(entry.to_string())); + } else if let Some(val) = arg.strip_prefix("--static-memory=") { + // Format: "path=global" or "path=constant" + let (path, space_str) = val.rsplit_once('=').unwrap_or_else(|| { + sess.dcx().fatal(format!( + "--static-memory requires format 'path=global|constant', got: {val}" + )) + }); + let space = MemorySpace::from_str(space_str).unwrap_or_else(|| { + sess.dcx().fatal(format!( + "invalid memory space '{space_str}', expected 'global' or 'constant'" + )) + }); + cg_args.static_memory_overrides.push((path.to_string(), space)); + } else if let Some(val) = arg.strip_prefix("--crate-memory=") { + // Format: "crate_name=global" or "crate_name=constant" + let (crate_name, space_str) = val.rsplit_once('=').unwrap_or_else(|| { + sess.dcx().fatal(format!( + "--crate-memory requires format 'crate_name=global|constant', got: {val}" + )) + }); + let space = MemorySpace::from_str(space_str).unwrap_or_else(|| { + sess.dcx().fatal(format!( + "invalid memory space '{space_str}', expected 'global' or 'constant'" + )) + }); + cg_args.crate_memory_overrides.push((crate_name.to_string(), space)); } else { // Do this only after all the other flags above have been tried. match NvvmOption::from_str(arg) { From 68a9509448c74141bbcc406fab62f73ae42aaf9c Mon Sep 17 00:00:00 2001 From: Quang Pham Date: Thu, 19 Feb 2026 15:56:18 -0600 Subject: [PATCH 2/9] constant memory overspill handling - if constant memory overspill, automatically move that static to global and throw a warning intead of failing - statics are packed in first-come-first-serve order, user can manually overrides this --- crates/cuda_builder/src/lib.rs | 20 +++++++++++++ crates/rustc_codegen_nvvm/src/context.rs | 30 +++++++------------ .../lang/consts/constant_memory_overflow.rs | 4 +-- .../consts/constant_memory_overflow.stderr | 25 +++++++++------- 4 files changed, 47 insertions(+), 32 deletions(-) diff --git a/crates/cuda_builder/src/lib.rs b/crates/cuda_builder/src/lib.rs index 450f3acf..cb703acc 100644 --- a/crates/cuda_builder/src/lib.rs +++ b/crates/cuda_builder/src/lib.rs @@ -201,6 +201,14 @@ pub struct CudaBuilder { /// Exceeding the limit may cause `IllegalAddress` runtime /// errors (CUDA error code: `700`). /// + /// Statics are placed on a first-come-first-served basis in the order the + /// codegen encounters them. When the cumulative size would exceed the 64KB + /// limit, the overflowing static is automatically spilled to global memory + /// with a compile-time warning. Subsequent smaller statics may still fit + /// and will continue to be placed in constant memory. This means the + /// codegen does not optimize for the "best" packing — it simply fills + /// constant memory in encounter order. + /// /// The default is `false`, which places all statics in global memory. This avoids /// such errors but may reduce performance and use more general memory. When set to /// `false`, you can still annotate `static` variables with @@ -210,6 +218,8 @@ pub struct CudaBuilder { /// Use [`place_static`](Self::place_static) and /// [`crate_memory_space`](Self::crate_memory_space) to override placement for /// individual statics or entire crates (including third-party crates). + /// These overrides let you prioritize performance-critical statics for + /// constant memory regardless of encounter order. pub use_constant_memory_space: bool, /// Per-static memory placement overrides. Keys are Rust path strings /// (e.g., `"my_crate::module::MY_STATIC"`). These take priority over per-crate @@ -370,6 +380,14 @@ impl CudaBuilder { /// Exceeding the limit may cause `IllegalAddress` runtime /// errors (CUDA error code: `700`). /// + /// Statics are placed on a first-come-first-served basis in the order the + /// codegen encounters them. When the cumulative size would exceed the 64KB + /// limit, the overflowing static is automatically spilled to global memory + /// with a compile-time warning. Subsequent smaller statics may still fit + /// and will continue to be placed in constant memory. This means the + /// codegen does not optimize for the "best" packing — it simply fills + /// constant memory in encounter order. + /// /// If `false`, all statics are placed in global memory. This avoids such errors but /// may reduce performance and use more general memory. You can still annotate /// `static` variables with `#[cuda_std::address_space(constant)]` to place them in @@ -378,6 +396,8 @@ impl CudaBuilder { /// Use [`place_static`](Self::place_static) and /// [`crate_memory_space`](Self::crate_memory_space) to override placement for /// individual statics or entire crates. + /// These overrides let you prioritize performance-critical statics for + /// constant memory regardless of encounter order. pub fn use_constant_memory_space(mut self, use_constant_memory_space: bool) -> Self { self.use_constant_memory_space = use_constant_memory_space; self diff --git a/crates/rustc_codegen_nvvm/src/context.rs b/crates/rustc_codegen_nvvm/src/context.rs index cf4740bd..306ca5b3 100644 --- a/crates/rustc_codegen_nvvm/src/context.rs +++ b/crates/rustc_codegen_nvvm/src/context.rs @@ -366,37 +366,27 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { } // Check if adding this static would exceed the cumulative limit + // Auto-spill to global memory with a warning instead of failing if new_usage > CONSTANT_MEMORY_SIZE_LIMIT_BYTES { let def_id = instance.def_id(); let span = self.tcx.def_span(def_id); - let mut diag = self.tcx.sess.dcx().struct_span_err( - span, - format!( - "cannot place static `{instance}` ({size_bytes} bytes) in constant memory: \ - cumulative constant memory usage would be {new_usage} bytes, exceeding the {} byte limit", - CONSTANT_MEMORY_SIZE_LIMIT_BYTES - ), - ); - diag.span_label( + let remaining = CONSTANT_MEMORY_SIZE_LIMIT_BYTES.saturating_sub(current_usage); + let mut diag = self.tcx.sess.dcx().struct_span_warn( span, format!( - "this static would cause total usage to exceed {} bytes", + "constant memory overflow: static `{instance}` ({size_bytes} bytes) does not fit in remaining \ + constant memory ({remaining} bytes free of {} bytes total)", CONSTANT_MEMORY_SIZE_LIMIT_BYTES ), ); + diag.span_label(span, "automatically placed in global memory"); diag.note(format!( - "current constant memory usage: {current_usage} bytes" + "current constant memory usage: {current_usage} / {} bytes", + CONSTANT_MEMORY_SIZE_LIMIT_BYTES )); - diag.note(format!("static size: {size_bytes} bytes")); - diag.note(format!("would result in: {new_usage} bytes total")); - - diag.help("move this or other statics to global memory using `#[cuda_std::address_space(global)]` or `.place_static(\"path\", MemorySpace::Global)` in build.rs"); - diag.help("reduce the total size of static data"); - diag.help("disable automatic constant memory placement by setting `.use_constant_memory_space(false)` on `CudaBuilder` in build.rs"); - + diag.help("use `.place_static(\"path\", MemorySpace::Constant)` in build.rs to prioritize specific statics for constant memory"); diag.emit(); - self.tcx.sess.dcx().abort_if_errors(); - unreachable!() + return AddressSpace(1); } // If successfully placed in constant memory: update cumulative usage diff --git a/tests/compiletests/ui/lang/consts/constant_memory_overflow.rs b/tests/compiletests/ui/lang/consts/constant_memory_overflow.rs index c3993952..d6cdce80 100644 --- a/tests/compiletests/ui/lang/consts/constant_memory_overflow.rs +++ b/tests/compiletests/ui/lang/consts/constant_memory_overflow.rs @@ -1,5 +1,5 @@ -// Test that automatic constant memory placement fails when exceeding the 64KB limit -// This test creates multiple large static arrays that together exceed the limit +// Test that automatic constant memory placement spills to global with a warning +// when exceeding the 64KB limit // compile-flags: -Cllvm-args=--use-constant-memory-space diff --git a/tests/compiletests/ui/lang/consts/constant_memory_overflow.stderr b/tests/compiletests/ui/lang/consts/constant_memory_overflow.stderr index 3fd14bf5..881b0db9 100644 --- a/tests/compiletests/ui/lang/consts/constant_memory_overflow.stderr +++ b/tests/compiletests/ui/lang/consts/constant_memory_overflow.stderr @@ -1,15 +1,20 @@ -error: cannot place static `BIG_ARRAY_2` (35840 bytes) in constant memory: cumulative constant memory usage would be 71680 bytes, exceeding the 65536 byte limit +warning: constant memory overflow: static `BIG_ARRAY_2` (35840 bytes) does not fit in remaining constant memory (29696 bytes free of 65536 bytes total) --> $DIR/constant_memory_overflow.rs:15:1 | LL | static BIG_ARRAY_2: [u32; ARRAY_SIZE] = [222u32; ARRAY_SIZE]; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ this static would cause total usage to exceed 65536 bytes - | - = note: current constant memory usage: 35840 bytes - = note: static size: 35840 bytes - = note: would result in: 71680 bytes total - = help: move this or other statics to global memory using `#[cuda_std::address_space(global)]` - = help: reduce the total size of static data - = help: disable automatic constant memory placement by setting `.use_constant_memory_space(false)` on `CudaBuilder` in build.rs + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ automatically placed in global memory + | + = note: current constant memory usage: 35840 / 65536 bytes + = help: use `.place_static("path", MemorySpace::Constant)` in build.rs to prioritize specific statics for constant memory + +warning: constant memory overflow: static `BIG_ARRAY_3` (35840 bytes) does not fit in remaining constant memory (29696 bytes free of 65536 bytes total) + --> $DIR/constant_memory_overflow.rs:16:1 + | +LL | static BIG_ARRAY_3: [u32; ARRAY_SIZE] = [333u32; ARRAY_SIZE]; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ automatically placed in global memory + | + = note: current constant memory usage: 35840 / 65536 bytes + = help: use `.place_static("path", MemorySpace::Constant)` in build.rs to prioritize specific statics for constant memory -error: aborting due to 1 previous error +warning: 2 warnings emitted From 7a2a6040e0637a1f48039b6d202035778edac418 Mon Sep 17 00:00:00 2001 From: Quang Pham Date: Thu, 19 Feb 2026 16:22:10 -0600 Subject: [PATCH 3/9] reserve space in constant memory for statics explicitly placed by the user --- crates/rustc_codegen_nvvm/src/context.rs | 98 +++++++++++++++++++++--- 1 file changed, 87 insertions(+), 11 deletions(-) diff --git a/crates/rustc_codegen_nvvm/src/context.rs b/crates/rustc_codegen_nvvm/src/context.rs index 306ca5b3..8767826c 100644 --- a/crates/rustc_codegen_nvvm/src/context.rs +++ b/crates/rustc_codegen_nvvm/src/context.rs @@ -28,7 +28,7 @@ use rustc_middle::ty::layout::{FnAbiOfHelpers, LayoutOfHelpers}; use rustc_middle::ty::{Ty, TypeVisitableExt}; use rustc_middle::{bug, span_bug, ty}; use rustc_middle::{ - mir::mono::CodegenUnit, + mir::mono::{CodegenUnit, MonoItem}, ty::{Instance, TyCtxt}, }; use rustc_session::Session; @@ -110,6 +110,9 @@ pub(crate) struct CodegenCx<'ll, 'tcx> { /// Tracks cumulative constant memory usage in bytes for compile-time diagnostics constant_memory_usage: Cell, + /// Pre-reserved constant memory bytes for statics with explicit placement overrides. + /// Computed lazily on first call to `static_addrspace`. + constant_memory_reserved: Cell>, } impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { @@ -181,6 +184,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { codegen_args: CodegenArgs::from_session(tcx.sess()), last_call_llfn: Cell::new(None), constant_memory_usage: Cell::new(0), + constant_memory_reserved: Cell::new(None), }; cx.build_intrinsics_map(); cx @@ -307,6 +311,57 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { None } + /// Computes the total constant memory bytes reserved for statics with explicit + /// placement overrides (`place_static`/`crate_memory_space` requesting constant). + /// This is computed lazily on the first call and cached. + /// + /// By pre-reserving this space, automatic placement only fills whatever remains, + /// ensuring explicitly requested statics always fit (as long as they fit together). + fn get_constant_memory_reserved(&self) -> u64 { + if let Some(reserved) = self.constant_memory_reserved.get() { + return reserved; + } + + let mut reserved: u64 = 0; + for (&item, _) in self.codegen_unit.items() { + if let MonoItem::Static(def_id) = item { + let instance = Instance::mono(self.tcx, def_id); + let ty = instance.ty(self.tcx, self.typing_env()); + + // Skip statics that can't go in constant memory + let is_mutable = self.tcx().is_mutable_static(def_id); + if is_mutable || !self.type_is_freeze(ty) { + continue; + } + + // Skip statics with explicit #[address_space] attributes + let attrs = self.tcx.get_all_attrs(def_id); + let nvvm_attrs = NvvmAttributes::parse(self, attrs); + if nvvm_attrs.addrspace.is_some() { + continue; + } + + // Only reserve for explicit overrides requesting constant memory + if let Some(MemorySpace::Constant) = self.resolve_memory_space(instance) { + let layout = self.layout_of(ty); + reserved += layout.size.bytes(); + } + } + } + + if reserved > CONSTANT_MEMORY_SIZE_LIMIT_BYTES { + self.tcx.sess.dcx().warn(format!( + "explicitly placed statics require {reserved} bytes of constant memory, \ + which exceeds the {} byte limit; this will likely cause runtime errors", + CONSTANT_MEMORY_SIZE_LIMIT_BYTES + )); + } + + self.constant_memory_reserved.set(Some(reserved)); + trace!("Pre-reserved {reserved} bytes of constant memory for explicitly placed statics"); + reserved + } + /// Computes the address space for a static. /// /// Priority system (highest to lowest): @@ -314,6 +369,10 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { /// 2. Per-static path override via CudaBuilder (`place_static`) /// 3. Per-crate override via CudaBuilder (`crate_memory_space`) /// 4. Global `use_constant_memory_space` flag + /// + /// Statics with explicit overrides (priorities 2-3) requesting constant memory + /// have their space pre-reserved, so automatic placement (priority 4) only fills + /// whatever remains. This ensures explicitly placed statics are packed first. pub fn static_addrspace(&self, instance: Instance<'tcx>) -> AddressSpace { let ty = instance.ty(self.tcx, self.typing_env()); let is_mutable = self.tcx().is_mutable_static(instance.def_id()); @@ -331,7 +390,9 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { } // Resolve memory space from overrides (priorities 2-4) - let want_constant = match self.resolve_memory_space(instance) { + let resolved = self.resolve_memory_space(instance); + let explicit_constant = matches!(resolved, Some(MemorySpace::Constant)); + let want_constant = match resolved { Some(MemorySpace::Constant) => true, Some(MemorySpace::Global) => false, None => self.codegen_args.use_constant_memory_space, @@ -347,15 +408,24 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { let current_usage = self.constant_memory_usage.get(); let new_usage = current_usage + size_bytes; + // For automatic placement, the effective limit is reduced by the space + // reserved for explicitly placed statics (so they always fit). + // For explicit overrides, use the full limit. + let effective_limit = if explicit_constant { + CONSTANT_MEMORY_SIZE_LIMIT_BYTES + } else { + let reserved = self.get_constant_memory_reserved(); + CONSTANT_MEMORY_SIZE_LIMIT_BYTES.saturating_sub(reserved) + }; + // Check if this single static is too large for constant memory - if size_bytes > CONSTANT_MEMORY_SIZE_LIMIT_BYTES { + if size_bytes > effective_limit { let def_id = instance.def_id(); let span = self.tcx.def_span(def_id); let mut diag = self.tcx.sess.dcx().struct_span_warn( span, format!( - "static `{instance}` is {size_bytes} bytes, exceeds the constant memory limit of {} bytes", - CONSTANT_MEMORY_SIZE_LIMIT_BYTES + "static `{instance}` is {size_bytes} bytes, exceeds the constant memory limit of {effective_limit} bytes", ), ); diag.span_label(span, "static exceeds constant memory limit"); @@ -367,23 +437,29 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { // Check if adding this static would exceed the cumulative limit // Auto-spill to global memory with a warning instead of failing - if new_usage > CONSTANT_MEMORY_SIZE_LIMIT_BYTES { + if new_usage > effective_limit { let def_id = instance.def_id(); let span = self.tcx.def_span(def_id); - let remaining = CONSTANT_MEMORY_SIZE_LIMIT_BYTES.saturating_sub(current_usage); + let remaining = effective_limit.saturating_sub(current_usage); let mut diag = self.tcx.sess.dcx().struct_span_warn( span, format!( "constant memory overflow: static `{instance}` ({size_bytes} bytes) does not fit in remaining \ - constant memory ({remaining} bytes free of {} bytes total)", - CONSTANT_MEMORY_SIZE_LIMIT_BYTES + constant memory ({remaining} bytes free of {effective_limit} bytes available)", ), ); diag.span_label(span, "automatically placed in global memory"); diag.note(format!( - "current constant memory usage: {current_usage} / {} bytes", - CONSTANT_MEMORY_SIZE_LIMIT_BYTES + "current constant memory usage: {current_usage} / {effective_limit} bytes", )); + if !explicit_constant { + let reserved = self.get_constant_memory_reserved(); + if reserved > 0 { + diag.note(format!( + "{reserved} bytes reserved for explicitly placed statics", + )); + } + } diag.help("use `.place_static(\"path\", MemorySpace::Constant)` in build.rs to prioritize specific statics for constant memory"); diag.emit(); return AddressSpace(1); From ef6f8b5aa09d5cdecfe578753f0baf1992700e21 Mon Sep 17 00:00:00 2001 From: Quang Pham Date: Thu, 19 Feb 2026 16:57:45 -0600 Subject: [PATCH 4/9] fix some formats --- crates/cuda_builder/src/lib.rs | 3 ++- crates/rustc_codegen_nvvm/src/context.rs | 13 ++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/crates/cuda_builder/src/lib.rs b/crates/cuda_builder/src/lib.rs index cb703acc..836d2734 100644 --- a/crates/cuda_builder/src/lib.rs +++ b/crates/cuda_builder/src/lib.rs @@ -441,7 +441,8 @@ impl CudaBuilder { /// .crate_memory_space("my_crate", MemorySpace::Constant); /// ``` pub fn crate_memory_space(mut self, crate_name: &str, space: MemorySpace) -> Self { - self.crate_memory_overrides.push((crate_name.to_string(), space)); + self.crate_memory_overrides + .push((crate_name.to_string(), space)); self } diff --git a/crates/rustc_codegen_nvvm/src/context.rs b/crates/rustc_codegen_nvvm/src/context.rs index 8767826c..7612cb2d 100644 --- a/crates/rustc_codegen_nvvm/src/context.rs +++ b/crates/rustc_codegen_nvvm/src/context.rs @@ -465,10 +465,10 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { return AddressSpace(1); } - // If successfully placed in constant memory: update cumulative usage + // If successfully placed in constant memory: update cumulative usage self.constant_memory_usage.set(new_usage); - // If approaching the threshold: warns + // If approaching the threshold: warns if new_usage > CONSTANT_MEMORY_WARNING_THRESHOLD_BYTES && current_usage <= CONSTANT_MEMORY_WARNING_THRESHOLD_BYTES { @@ -781,7 +781,6 @@ impl MemorySpace { _ => None, } } - } #[derive(Default, Clone)] @@ -863,7 +862,9 @@ impl CodegenArgs { "invalid memory space '{space_str}', expected 'global' or 'constant'" )) }); - cg_args.static_memory_overrides.push((path.to_string(), space)); + cg_args + .static_memory_overrides + .push((path.to_string(), space)); } else if let Some(val) = arg.strip_prefix("--crate-memory=") { // Format: "crate_name=global" or "crate_name=constant" let (crate_name, space_str) = val.rsplit_once('=').unwrap_or_else(|| { @@ -876,7 +877,9 @@ impl CodegenArgs { "invalid memory space '{space_str}', expected 'global' or 'constant'" )) }); - cg_args.crate_memory_overrides.push((crate_name.to_string(), space)); + cg_args + .crate_memory_overrides + .push((crate_name.to_string(), space)); } else { // Do this only after all the other flags above have been tried. match NvvmOption::from_str(arg) { From d6674e1a5badc2c38a414201e62a029b1c2f661f Mon Sep 17 00:00:00 2001 From: Quang Pham Date: Thu, 19 Feb 2026 17:25:53 -0600 Subject: [PATCH 5/9] fix def_path_str(def_id) bug --- crates/rustc_codegen_nvvm/src/context.rs | 31 +++++++++++++++++------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/crates/rustc_codegen_nvvm/src/context.rs b/crates/rustc_codegen_nvvm/src/context.rs index 7612cb2d..3c73e82e 100644 --- a/crates/rustc_codegen_nvvm/src/context.rs +++ b/crates/rustc_codegen_nvvm/src/context.rs @@ -25,6 +25,7 @@ use rustc_middle::ty::layout::{ FnAbiError, FnAbiOf, FnAbiRequest, HasTyCtxt, HasTypingEnv, LayoutError, LayoutOf, }; use rustc_middle::ty::layout::{FnAbiOfHelpers, LayoutOfHelpers}; +use rustc_middle::ty::print::with_no_trimmed_paths; use rustc_middle::ty::{Ty, TypeVisitableExt}; use rustc_middle::{bug, span_bug, ty}; use rustc_middle::{ @@ -289,21 +290,33 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { /// Resolves the memory space for a static based on per-static and per-crate overrides. /// Returns `None` if no override applies (caller falls through to the global flag). fn resolve_memory_space(&self, instance: Instance<'tcx>) -> Option { + let has_static_overrides = !self.codegen_args.static_memory_overrides.is_empty(); + let has_crate_overrides = !self.codegen_args.crate_memory_overrides.is_empty(); + + // Early return if no overrides are configured + if !has_static_overrides && !has_crate_overrides { + return None; + } + let def_id = instance.def_id(); - let def_path = self.tcx.def_path_str(def_id); - // Priority 2: Per-static override - for (pattern, space) in &self.codegen_args.static_memory_overrides { - if path_matches(&def_path, pattern) { - return Some(*space); + // Priority 2: Per-static override (only compute def_path if needed) + if has_static_overrides { + let def_path = with_no_trimmed_paths!(self.tcx.def_path_str(def_id)); + for (pattern, space) in &self.codegen_args.static_memory_overrides { + if path_matches(&def_path, pattern) { + return Some(*space); + } } } // Priority 3: Per-crate override - let crate_name = self.tcx.crate_name(def_id.krate); - for (name, space) in &self.codegen_args.crate_memory_overrides { - if crate_name.as_str() == name { - return Some(*space); + if has_crate_overrides { + let crate_name = self.tcx.crate_name(def_id.krate); + for (name, space) in &self.codegen_args.crate_memory_overrides { + if crate_name.as_str() == name { + return Some(*space); + } } } From 27962c27699f7fd870c9e7a0084f1aa0067a9245 Mon Sep 17 00:00:00 2001 From: Quang Pham Date: Thu, 19 Feb 2026 17:48:50 -0600 Subject: [PATCH 6/9] change crate_memory_space to place_crate --- crates/cuda_builder/src/lib.rs | 12 ++++++------ crates/rustc_codegen_nvvm/src/context.rs | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/cuda_builder/src/lib.rs b/crates/cuda_builder/src/lib.rs index 836d2734..fff2553e 100644 --- a/crates/cuda_builder/src/lib.rs +++ b/crates/cuda_builder/src/lib.rs @@ -58,7 +58,7 @@ pub enum EmitOption { /// Specifies which CUDA memory space a static should be placed in. /// -/// Used with [`CudaBuilder::place_static`] and [`CudaBuilder::crate_memory_space`] +/// Used with [`CudaBuilder::place_static`] and [`CudaBuilder::place_crate`] /// to control placement of statics in constant or global memory. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum MemorySpace { @@ -216,7 +216,7 @@ pub struct CudaBuilder { /// manually. This option only affects automatic placement. /// /// Use [`place_static`](Self::place_static) and - /// [`crate_memory_space`](Self::crate_memory_space) to override placement for + /// [`place_crate`](Self::place_crate) to override placement for /// individual statics or entire crates (including third-party crates). /// These overrides let you prioritize performance-critical statics for /// constant memory regardless of encounter order. @@ -394,7 +394,7 @@ impl CudaBuilder { /// constant memory manually as this option only affects automatic placement. /// /// Use [`place_static`](Self::place_static) and - /// [`crate_memory_space`](Self::crate_memory_space) to override placement for + /// [`place_crate`](Self::place_crate) to override placement for /// individual statics or entire crates. /// These overrides let you prioritize performance-critical statics for /// constant memory regardless of encounter order. @@ -437,10 +437,10 @@ impl CudaBuilder { /// # use cuda_builder::{CudaBuilder, MemorySpace}; /// CudaBuilder::new("my_gpu_crate") /// .use_constant_memory_space(true) - /// .crate_memory_space("ndarray", MemorySpace::Global) - /// .crate_memory_space("my_crate", MemorySpace::Constant); + /// .place_crate("ndarray", MemorySpace::Global) + /// .place_crate("my_crate", MemorySpace::Constant); /// ``` - pub fn crate_memory_space(mut self, crate_name: &str, space: MemorySpace) -> Self { + pub fn place_crate(mut self, crate_name: &str, space: MemorySpace) -> Self { self.crate_memory_overrides .push((crate_name.to_string(), space)); self diff --git a/crates/rustc_codegen_nvvm/src/context.rs b/crates/rustc_codegen_nvvm/src/context.rs index 3c73e82e..6ec4583c 100644 --- a/crates/rustc_codegen_nvvm/src/context.rs +++ b/crates/rustc_codegen_nvvm/src/context.rs @@ -325,7 +325,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { } /// Computes the total constant memory bytes reserved for statics with explicit - /// placement overrides (`place_static`/`crate_memory_space` requesting constant). + /// placement overrides (`place_static`/`place_crate` requesting constant). /// This is computed lazily on the first call and cached. /// /// By pre-reserving this space, automatic placement only fills whatever remains, @@ -380,7 +380,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { /// Priority system (highest to lowest): /// 1. Explicit `#[cuda_std::address_space(...)]` attribute /// 2. Per-static path override via CudaBuilder (`place_static`) - /// 3. Per-crate override via CudaBuilder (`crate_memory_space`) + /// 3. Per-crate override via CudaBuilder (`place_crate`) /// 4. Global `use_constant_memory_space` flag /// /// Statics with explicit overrides (priorities 2-3) requesting constant memory From ab281b6c3dbffe164404236dcd0dce09f9c268c0 Mon Sep 17 00:00:00 2001 From: Quang Pham Date: Thu, 19 Feb 2026 17:52:18 -0600 Subject: [PATCH 7/9] fix a bug that double counts a variable --- crates/rustc_codegen_nvvm/src/context.rs | 19 +++++++++++++++++++ .../consts/constant_memory_overflow.stderr | 4 ++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/crates/rustc_codegen_nvvm/src/context.rs b/crates/rustc_codegen_nvvm/src/context.rs index 6ec4583c..93f856de 100644 --- a/crates/rustc_codegen_nvvm/src/context.rs +++ b/crates/rustc_codegen_nvvm/src/context.rs @@ -114,6 +114,10 @@ pub(crate) struct CodegenCx<'ll, 'tcx> { /// Pre-reserved constant memory bytes for statics with explicit placement overrides. /// Computed lazily on first call to `static_addrspace`. constant_memory_reserved: Cell>, + /// Cache of address space decisions per static instance, to prevent + /// double-counting when `static_addrspace` is called multiple times + /// (e.g., during both predefine and RAUW phases). + static_addrspace_cache: RefCell, AddressSpace>>, } impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { @@ -186,6 +190,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { last_call_llfn: Cell::new(None), constant_memory_usage: Cell::new(0), constant_memory_reserved: Cell::new(None), + static_addrspace_cache: Default::default(), }; cx.build_intrinsics_map(); cx @@ -387,6 +392,20 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { /// have their space pre-reserved, so automatic placement (priority 4) only fills /// whatever remains. This ensures explicitly placed statics are packed first. pub fn static_addrspace(&self, instance: Instance<'tcx>) -> AddressSpace { + // Return cached result to prevent double-counting constant memory usage + // when called from multiple phases (predefine, define/RAUW). + if let Some(&cached) = self.static_addrspace_cache.borrow().get(&instance) { + return cached; + } + + let result = self.compute_static_addrspace(instance); + self.static_addrspace_cache + .borrow_mut() + .insert(instance, result); + result + } + + fn compute_static_addrspace(&self, instance: Instance<'tcx>) -> AddressSpace { let ty = instance.ty(self.tcx, self.typing_env()); let is_mutable = self.tcx().is_mutable_static(instance.def_id()); let attrs = self.tcx.get_all_attrs(instance.def_id()); // TODO: replace with get_attrs diff --git a/tests/compiletests/ui/lang/consts/constant_memory_overflow.stderr b/tests/compiletests/ui/lang/consts/constant_memory_overflow.stderr index 881b0db9..0d84c98f 100644 --- a/tests/compiletests/ui/lang/consts/constant_memory_overflow.stderr +++ b/tests/compiletests/ui/lang/consts/constant_memory_overflow.stderr @@ -1,4 +1,4 @@ -warning: constant memory overflow: static `BIG_ARRAY_2` (35840 bytes) does not fit in remaining constant memory (29696 bytes free of 65536 bytes total) +warning: constant memory overflow: static `BIG_ARRAY_2` (35840 bytes) does not fit in remaining constant memory (29696 bytes free of 65536 bytes available) --> $DIR/constant_memory_overflow.rs:15:1 | LL | static BIG_ARRAY_2: [u32; ARRAY_SIZE] = [222u32; ARRAY_SIZE]; @@ -7,7 +7,7 @@ LL | static BIG_ARRAY_2: [u32; ARRAY_SIZE] = [222u32; ARRAY_SIZE]; = note: current constant memory usage: 35840 / 65536 bytes = help: use `.place_static("path", MemorySpace::Constant)` in build.rs to prioritize specific statics for constant memory -warning: constant memory overflow: static `BIG_ARRAY_3` (35840 bytes) does not fit in remaining constant memory (29696 bytes free of 65536 bytes total) +warning: constant memory overflow: static `BIG_ARRAY_3` (35840 bytes) does not fit in remaining constant memory (29696 bytes free of 65536 bytes available) --> $DIR/constant_memory_overflow.rs:16:1 | LL | static BIG_ARRAY_3: [u32; ARRAY_SIZE] = [333u32; ARRAY_SIZE]; From 0b3ad86ddfc19d5de84df58106420e5ad3c10e6d Mon Sep 17 00:00:00 2001 From: Quang Pham Date: Thu, 19 Feb 2026 18:21:07 -0600 Subject: [PATCH 8/9] fix another double counting bug + change global address to 1 --- crates/rustc_codegen_nvvm/src/consts.rs | 2 -- crates/rustc_codegen_nvvm/src/context.rs | 22 ++++++++++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/crates/rustc_codegen_nvvm/src/consts.rs b/crates/rustc_codegen_nvvm/src/consts.rs index 1d904f68..8e2e1a9b 100644 --- a/crates/rustc_codegen_nvvm/src/consts.rs +++ b/crates/rustc_codegen_nvvm/src/consts.rs @@ -415,8 +415,6 @@ impl<'ll> StaticCodegenMethods for CodegenCx<'ll, '_> { // llvm::LLVMSetGlobalConstant(g, llvm::True); } - debug_info::build_global_var_di_node(self, def_id, g); - if attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL) { self.unsupported("thread locals"); } diff --git a/crates/rustc_codegen_nvvm/src/context.rs b/crates/rustc_codegen_nvvm/src/context.rs index 93f856de..37cdda31 100644 --- a/crates/rustc_codegen_nvvm/src/context.rs +++ b/crates/rustc_codegen_nvvm/src/context.rs @@ -114,6 +114,11 @@ pub(crate) struct CodegenCx<'ll, 'tcx> { /// Pre-reserved constant memory bytes for statics with explicit placement overrides. /// Computed lazily on first call to `static_addrspace`. constant_memory_reserved: Cell>, + /// Tracks how many reserved bytes have already been placed (and thus already + /// counted in `constant_memory_usage`). This prevents double-counting: without it, + /// the effective limit for automatic statics subtracts the full reservation even + /// though some of that reserved space is already reflected in `constant_memory_usage`. + explicit_constant_memory_placed: Cell, /// Cache of address space decisions per static instance, to prevent /// double-counting when `static_addrspace` is called multiple times /// (e.g., during both predefine and RAUW phases). @@ -190,6 +195,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { last_call_llfn: Cell::new(None), constant_memory_usage: Cell::new(0), constant_memory_reserved: Cell::new(None), + explicit_constant_memory_placed: Cell::new(0), static_addrspace_cache: Default::default(), }; cx.build_intrinsics_map(); @@ -418,7 +424,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { // Mutable or non-freeze statics cannot go in constant memory if is_mutable || !self.type_is_freeze(ty) { - return AddressSpace::ZERO; + return AddressSpace(1); } // Resolve memory space from overrides (priorities 2-4) @@ -440,14 +446,18 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { let current_usage = self.constant_memory_usage.get(); let new_usage = current_usage + size_bytes; - // For automatic placement, the effective limit is reduced by the space - // reserved for explicitly placed statics (so they always fit). + // For automatic placement, the effective limit is reduced by the reserved + // space that has NOT yet been placed. Reserved space that has already been + // placed is already reflected in `constant_memory_usage`, so subtracting + // the full reservation would double-count it. // For explicit overrides, use the full limit. let effective_limit = if explicit_constant { CONSTANT_MEMORY_SIZE_LIMIT_BYTES } else { let reserved = self.get_constant_memory_reserved(); - CONSTANT_MEMORY_SIZE_LIMIT_BYTES.saturating_sub(reserved) + let already_placed = self.explicit_constant_memory_placed.get(); + let remaining_reservation = reserved.saturating_sub(already_placed); + CONSTANT_MEMORY_SIZE_LIMIT_BYTES.saturating_sub(remaining_reservation) }; // Check if this single static is too large for constant memory @@ -499,6 +509,10 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { // If successfully placed in constant memory: update cumulative usage self.constant_memory_usage.set(new_usage); + if explicit_constant { + self.explicit_constant_memory_placed + .set(self.explicit_constant_memory_placed.get() + size_bytes); + } // If approaching the threshold: warns if new_usage > CONSTANT_MEMORY_WARNING_THRESHOLD_BYTES From 00378ad8ff14ca089058556650425d9e18dc1fc6 Mon Sep 17 00:00:00 2001 From: Quang Pham Date: Thu, 19 Feb 2026 22:20:16 -0600 Subject: [PATCH 9/9] fix bug where statics with explicit address space are not counted --- crates/rustc_codegen_nvvm/src/context.rs | 38 ++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/crates/rustc_codegen_nvvm/src/context.rs b/crates/rustc_codegen_nvvm/src/context.rs index 37cdda31..5f097b03 100644 --- a/crates/rustc_codegen_nvvm/src/context.rs +++ b/crates/rustc_codegen_nvvm/src/context.rs @@ -358,10 +358,16 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { continue; } - // Skip statics with explicit #[address_space] attributes + // Handle statics with explicit #[address_space] attributes: + // count #[address_space(constant)] toward the reservation (they + // consume constant memory), skip all others. let attrs = self.tcx.get_all_attrs(def_id); let nvvm_attrs = NvvmAttributes::parse(self, attrs); - if nvvm_attrs.addrspace.is_some() { + if let Some(addr) = nvvm_attrs.addrspace { + if addr == 4 { + let layout = self.layout_of(ty); + reserved += layout.size.bytes(); + } continue; } @@ -419,6 +425,34 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { // Priority 1: Explicit #[address_space] attribute always wins if let Some(addr) = nvvm_attrs.addrspace { + // Track constant memory usage for #[address_space(constant)] so we + // don't silently exceed the 64KB hardware limit. + if addr == 4 { + let layout = self.layout_of(ty); + let size_bytes = layout.size.bytes(); + let current_usage = self.constant_memory_usage.get(); + let new_usage = current_usage + size_bytes; + self.constant_memory_usage.set(new_usage); + self.explicit_constant_memory_placed + .set(self.explicit_constant_memory_placed.get() + size_bytes); + + if new_usage > CONSTANT_MEMORY_SIZE_LIMIT_BYTES { + let def_id = instance.def_id(); + let span = self.tcx.def_span(def_id); + let mut diag = self.tcx.sess.dcx().struct_span_warn( + span, + format!( + "constant memory overflow: static `{instance}` ({size_bytes} bytes) \ + causes total constant memory usage ({new_usage} bytes) to exceed \ + the {CONSTANT_MEMORY_SIZE_LIMIT_BYTES} byte hardware limit", + ), + ); + diag.span_label(span, "placed via explicit #[address_space(constant)]"); + diag.note("this will likely cause runtime errors"); + diag.help("consider moving some statics to global memory"); + diag.emit(); + } + } return AddressSpace(addr as u32); }