From 3337c4a49f431af73d0e295be82256e3be1603f5 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Mon, 24 Nov 2025 16:37:17 +1100
Subject: [PATCH 1/3] Update `NvvmArch`.

Remove and add some `NvvmArch` variants.
  Remove `compute_35` and `compute_37`, which are no longer
  needed/supported now that CUDA 11.x support is gone (#312).
- Add `compute_73`, which is supported in CUDA 12.0-12.8 but was never
  added.
- Add `compute_88` and `compute110{,f,a}`, which are new in CUDA 13.0.
---
 crates/nvvm/src/lib.rs                  | 141 ++++++++++++++----------
 guide/src/guide/compute_capabilities.md |   2 +-
 2 files changed, 83 insertions(+), 60 deletions(-)

diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs
index c89aab93..cb19c6e7 100644
--- a/crates/nvvm/src/lib.rs
+++ b/crates/nvvm/src/lib.rs
@@ -300,8 +300,6 @@ impl FromStr for NvvmOption {
 /// ```
 #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, strum::EnumIter)]
 pub enum NvvmArch {
-    Compute35,
-    Compute37,
     Compute50,
     Compute52,
     Compute53,
@@ -310,6 +308,7 @@ pub enum NvvmArch {
     Compute62,
     Compute70,
     Compute72,
+    Compute73,
     /// This default value of 7.5 corresponds to Turing and later devices. We default to this
     /// because it is the minimum supported by CUDA 13.0 while being in the middle of the range
     /// supported by CUDA 12.x.
@@ -320,6 +319,7 @@ pub enum NvvmArch {
     Compute80,
     Compute86,
     Compute87,
+    Compute88,
     Compute89,
     Compute90,
     Compute90a,
@@ -332,6 +332,9 @@ pub enum NvvmArch {
     Compute103,
     Compute103f,
     Compute103a,
+    Compute110,
+    Compute110f,
+    Compute110a,
     Compute120,
     Compute120f,
     Compute120a,
@@ -351,8 +354,6 @@ impl FromStr for NvvmArch {
 
     fn from_str(s: &str) -> Result<Self, Self::Err> {
         Ok(match s {
-            "compute_35" => NvvmArch::Compute35,
-            "compute_37" => NvvmArch::Compute37,
             "compute_50" => NvvmArch::Compute50,
             "compute_52" => NvvmArch::Compute52,
             "compute_53" => NvvmArch::Compute53,
@@ -361,10 +362,12 @@ impl FromStr for NvvmArch {
             "compute_62" => NvvmArch::Compute62,
             "compute_70" => NvvmArch::Compute70,
             "compute_72" => NvvmArch::Compute72,
+            "compute_73" => NvvmArch::Compute73,
             "compute_75" => NvvmArch::Compute75,
             "compute_80" => NvvmArch::Compute80,
             "compute_86" => NvvmArch::Compute86,
             "compute_87" => NvvmArch::Compute87,
+            "compute_88" => NvvmArch::Compute88,
             "compute_89" => NvvmArch::Compute89,
             "compute_90" => NvvmArch::Compute90,
             "compute_90a" => NvvmArch::Compute90a,
@@ -377,6 +380,9 @@ impl FromStr for NvvmArch {
             "compute_103" => NvvmArch::Compute103,
             "compute_103f" => NvvmArch::Compute103f,
             "compute_103a" => NvvmArch::Compute103a,
+            "compute_110" => NvvmArch::Compute110,
+            "compute_110f" => NvvmArch::Compute110f,
+            "compute_110a" => NvvmArch::Compute110a,
             "compute_120" => NvvmArch::Compute120,
             "compute_120f" => NvvmArch::Compute120f,
             "compute_120a" => NvvmArch::Compute120a,
@@ -389,11 +395,9 @@ impl FromStr for NvvmArch {
 }
 
 impl NvvmArch {
-    /// Get the numeric capability value (e.g., 35 for Compute35)
+    /// Get the numeric capability value (e.g., 90 for `Compute90` or `Compute90a`).
     pub fn capability_value(&self) -> u32 {
         match self {
-            Self::Compute35 => 35,
-            Self::Compute37 => 37,
             Self::Compute50 => 50,
             Self::Compute52 => 52,
             Self::Compute53 => 53,
@@ -402,10 +406,12 @@ impl NvvmArch {
             Self::Compute62 => 62,
             Self::Compute70 => 70,
             Self::Compute72 => 72,
+            Self::Compute73 => 73,
             Self::Compute75 => 75,
             Self::Compute80 => 80,
             Self::Compute86 => 86,
             Self::Compute87 => 87,
+            Self::Compute88 => 88,
             Self::Compute89 => 89,
             Self::Compute90 => 90,
             Self::Compute90a => 90,
@@ -418,6 +424,9 @@ impl NvvmArch {
             Self::Compute103 => 103,
             Self::Compute103f => 103,
             Self::Compute103a => 103,
+            Self::Compute110 => 110,
+            Self::Compute110f => 110,
+            Self::Compute110a => 110,
             Self::Compute120 => 120,
             Self::Compute120f => 120,
             Self::Compute120a => 120,
@@ -437,11 +446,10 @@ impl NvvmArch {
         self.capability_value() % 10
     }
 
-    /// Get the target feature string (e.g., "compute_35" for Compute35, "compute_90a" for Compute90a)
+    /// Get the target feature string (e.g., "compute_50" for `Compute50`, "compute_90a" for
+    /// `Compute90a`).
     pub fn target_feature(&self) -> &'static str {
         match self {
-            Self::Compute35 => "compute_35",
-            Self::Compute37 => "compute_37",
             Self::Compute50 => "compute_50",
             Self::Compute52 => "compute_52",
             Self::Compute53 => "compute_53",
@@ -450,10 +458,12 @@ impl NvvmArch {
             Self::Compute62 => "compute_62",
             Self::Compute70 => "compute_70",
             Self::Compute72 => "compute_72",
+            Self::Compute73 => "compute_73",
             Self::Compute75 => "compute_75",
             Self::Compute80 => "compute_80",
             Self::Compute86 => "compute_86",
             Self::Compute87 => "compute_87",
+            Self::Compute88 => "compute_88",
             Self::Compute89 => "compute_89",
             Self::Compute90 => "compute_90",
             Self::Compute90a => "compute_90a",
@@ -466,6 +476,9 @@ impl NvvmArch {
             Self::Compute103 => "compute_103",
             Self::Compute103f => "compute_103f",
             Self::Compute103a => "compute_103a",
+            Self::Compute110 => "compute_110",
+            Self::Compute110f => "compute_110f",
+            Self::Compute110a => "compute_110a",
             Self::Compute120 => "compute_120",
             Self::Compute120f => "compute_120f",
             Self::Compute120a => "compute_120a",
@@ -489,10 +502,10 @@ impl NvvmArch {
     ///
     /// ```
     /// use nvvm::NvvmArch::*;
-    /// let features = Compute53.all_target_features();
+    /// let features = Compute61.all_target_features();
     /// assert_eq!(
     ///     features,
-    ///     vec![Compute35, Compute37, Compute50, Compute52, Compute53]
+    ///     vec![Compute50, Compute52, Compute53, Compute60, Compute61]
     /// );
     /// ```
     ///
@@ -558,9 +571,7 @@ impl NvvmArch {
     pub fn base_architecture(&self) -> Self {
         match self {
             // Already base variants
-            Self::Compute35
-            | Self::Compute37
-            | Self::Compute50
+            Self::Compute50
             | Self::Compute52
             | Self::Compute53
             | Self::Compute60
@@ -568,15 +579,18 @@ impl NvvmArch {
             | Self::Compute62
             | Self::Compute70
             | Self::Compute72
+            | Self::Compute73
             | Self::Compute75
             | Self::Compute80
             | Self::Compute86
             | Self::Compute87
+            | Self::Compute88
             | Self::Compute89
             | Self::Compute90
             | Self::Compute100
             | Self::Compute101
             | Self::Compute103
+            | Self::Compute110
             | Self::Compute120
             | Self::Compute121 => *self,
 
@@ -584,6 +598,7 @@ impl NvvmArch {
             Self::Compute100f => Self::Compute100,
             Self::Compute101f => Self::Compute101,
             Self::Compute103f => Self::Compute103,
+            Self::Compute110f => Self::Compute110,
             Self::Compute120f => Self::Compute120,
             Self::Compute121f => Self::Compute121,
 
@@ -592,6 +607,7 @@ impl NvvmArch {
             Self::Compute100a => Self::Compute100,
             Self::Compute101a => Self::Compute101,
             Self::Compute103a => Self::Compute103,
+            Self::Compute110a => Self::Compute110,
             Self::Compute120a => Self::Compute120,
             Self::Compute121a => Self::Compute121,
         }
@@ -733,8 +749,6 @@ mod tests {
 
     #[test]
     fn nvvm_arch_capability_value() {
-        assert_eq!(Compute35.capability_value(), 35);
-        assert_eq!(Compute37.capability_value(), 37);
         assert_eq!(Compute50.capability_value(), 50);
         assert_eq!(Compute52.capability_value(), 52);
         assert_eq!(Compute53.capability_value(), 53);
@@ -743,19 +757,37 @@ mod tests {
         assert_eq!(Compute62.capability_value(), 62);
         assert_eq!(Compute70.capability_value(), 70);
         assert_eq!(Compute72.capability_value(), 72);
+        assert_eq!(Compute73.capability_value(), 73);
         assert_eq!(Compute75.capability_value(), 75);
         assert_eq!(Compute80.capability_value(), 80);
         assert_eq!(Compute86.capability_value(), 86);
         assert_eq!(Compute87.capability_value(), 87);
+        assert_eq!(Compute88.capability_value(), 88);
         assert_eq!(Compute89.capability_value(), 89);
         assert_eq!(Compute90.capability_value(), 90);
+        assert_eq!(Compute90a.capability_value(), 90);
+        assert_eq!(Compute100.capability_value(), 100);
+        assert_eq!(Compute100f.capability_value(), 100);
+        assert_eq!(Compute100a.capability_value(), 100);
+        assert_eq!(Compute101.capability_value(), 101);
+        assert_eq!(Compute101f.capability_value(), 101);
+        assert_eq!(Compute101a.capability_value(), 101);
+        assert_eq!(Compute103.capability_value(), 103);
+        assert_eq!(Compute103f.capability_value(), 103);
+        assert_eq!(Compute103a.capability_value(), 103);
+        assert_eq!(Compute110.capability_value(), 110);
+        assert_eq!(Compute110f.capability_value(), 110);
+        assert_eq!(Compute110a.capability_value(), 110);
+        assert_eq!(Compute120.capability_value(), 120);
+        assert_eq!(Compute120f.capability_value(), 120);
+        assert_eq!(Compute120a.capability_value(), 120);
     }
 
     #[test]
     fn nvvm_arch_major_minor_version() {
         // Test major/minor version extraction
-        assert_eq!(Compute35.major_version(), 3);
-        assert_eq!(Compute35.minor_version(), 5);
+        assert_eq!(Compute53.major_version(), 5);
+        assert_eq!(Compute53.minor_version(), 3);
 
         assert_eq!(Compute70.major_version(), 7);
         assert_eq!(Compute70.minor_version(), 0);
@@ -774,7 +806,7 @@ mod tests {
     #[test]
     fn nvvm_arch_target_feature() {
         // Test baseline features
-        assert_eq!(Compute35.target_feature(), "compute_35");
+        assert_eq!(Compute50.target_feature(), "compute_50");
         assert_eq!(Compute61.target_feature(), "compute_61");
         assert_eq!(Compute90.target_feature(), "compute_90");
         assert_eq!(Compute100.target_feature(), "compute_100");
@@ -798,32 +830,18 @@ mod tests {
 
     #[test]
     fn nvvm_arch_all_target_features() {
-        assert_eq!(Compute35.all_target_features(), vec![Compute35]);
-
-        assert_eq!(
-            Compute50.all_target_features(),
-            vec![Compute35, Compute37, Compute50],
-        );
-
-        assert_eq!(
-            Compute61.all_target_features(),
-            vec![Compute35, Compute37, Compute50, Compute52, Compute53, Compute60, Compute61]
-        );
+        assert_eq!(Compute50.all_target_features(), vec![Compute50]);
 
         assert_eq!(
             Compute70.all_target_features(),
-            vec![
-                Compute35, Compute37, Compute50, Compute52, Compute53, Compute60, Compute61,
-                Compute62, Compute70,
-            ]
+            vec![Compute50, Compute52, Compute53, Compute60, Compute61, Compute62, Compute70]
         );
 
-        let compute90_features = Compute90.all_target_features();
         assert_eq!(
-            compute90_features,
+            Compute90.all_target_features(),
             vec![
-                Compute35, Compute37, Compute50, Compute52, Compute53, Compute60, Compute61,
-                Compute62, Compute70, Compute72, Compute75, Compute80, Compute86, Compute87,
+                Compute50, Compute52, Compute53, Compute60, Compute61, Compute62, Compute70,
+                Compute72, Compute73, Compute75, Compute80, Compute86, Compute87, Compute88,
                 Compute89, Compute90,
             ]
         );
@@ -831,8 +849,8 @@ mod tests {
         assert_eq!(
             Compute90a.all_target_features(),
             vec![
-                Compute35, Compute37, Compute50, Compute52, Compute53, Compute60, Compute61,
-                Compute62, Compute70, Compute72, Compute75, Compute80, Compute86, Compute87,
+                Compute50, Compute52, Compute53, Compute60, Compute61, Compute62, Compute70,
+                Compute72, Compute73, Compute75, Compute80, Compute86, Compute87, Compute88,
                 Compute89, Compute90, Compute90a,
             ]
         );
@@ -840,8 +858,6 @@ mod tests {
         assert_eq!(
             Compute100a.all_target_features(),
             vec![
-                Compute35,
-                Compute37,
                 Compute50,
                 Compute52,
                 Compute53,
@@ -850,10 +866,12 @@ mod tests {
                 Compute62,
                 Compute70,
                 Compute72,
+                Compute73,
                 Compute75,
                 Compute80,
                 Compute86,
                 Compute87,
+                Compute88,
                 Compute89,
                 Compute90,
                 Compute100,
@@ -865,8 +883,6 @@ mod tests {
         assert_eq!(
             Compute100f.all_target_features(),
             vec![
-                Compute35,
-                Compute37,
                 Compute50,
                 Compute52,
                 Compute53,
@@ -875,10 +891,12 @@ mod tests {
                 Compute62,
                 Compute70,
                 Compute72,
+                Compute73,
                 Compute75,
                 Compute80,
                 Compute86,
                 Compute87,
+                Compute88,
                 Compute89,
                 Compute90,
                 Compute100,
@@ -889,8 +907,6 @@ mod tests {
         assert_eq!(
             Compute101a.all_target_features(),
             vec![
-                Compute35,
-                Compute37,
                 Compute50,
                 Compute52,
                 Compute53,
@@ -899,10 +915,12 @@ mod tests {
                 Compute62,
                 Compute70,
                 Compute72,
+                Compute73,
                 Compute75,
                 Compute80,
                 Compute86,
                 Compute87,
+                Compute88,
                 Compute89,
                 Compute90,
                 Compute100,
@@ -916,8 +934,6 @@ mod tests {
         assert_eq!(
             Compute101f.all_target_features(),
             vec![
-                Compute35,
-                Compute37,
                 Compute50,
                 Compute52,
                 Compute53,
@@ -926,10 +942,12 @@ mod tests {
                 Compute62,
                 Compute70,
                 Compute72,
+                Compute73,
                 Compute75,
                 Compute80,
                 Compute86,
                 Compute87,
+                Compute88,
                 Compute89,
                 Compute90,
                 Compute100,
@@ -942,17 +960,15 @@ mod tests {
         assert_eq!(
             Compute120.all_target_features(),
             vec![
-                Compute35, Compute37, Compute50, Compute52, Compute53, Compute60, Compute61,
-                Compute62, Compute70, Compute72, Compute75, Compute80, Compute86, Compute87,
-                Compute89, Compute90, Compute100, Compute101, Compute103, Compute120,
+                Compute50, Compute52, Compute53, Compute60, Compute61, Compute62, Compute70,
+                Compute72, Compute73, Compute75, Compute80, Compute86, Compute87, Compute88,
+                Compute89, Compute90, Compute100, Compute101, Compute103, Compute110, Compute120,
             ]
         );
 
         assert_eq!(
             Compute120f.all_target_features(),
             vec![
-                Compute35,
-                Compute37,
                 Compute50,
                 Compute52,
                 Compute53,
@@ -961,15 +977,18 @@ mod tests {
                 Compute62,
                 Compute70,
                 Compute72,
+                Compute73,
                 Compute75,
                 Compute80,
                 Compute86,
                 Compute87,
+                Compute88,
                 Compute89,
                 Compute90,
                 Compute100,
                 Compute101,
                 Compute103,
+                Compute110,
                 Compute120,
                 Compute120f,
             ]
@@ -978,8 +997,6 @@ mod tests {
         assert_eq!(
             Compute120a.all_target_features(),
             vec![
-                Compute35,
-                Compute37,
                 Compute50,
                 Compute52,
                 Compute53,
@@ -988,15 +1005,18 @@ mod tests {
                 Compute62,
                 Compute70,
                 Compute72,
+                Compute73,
                 Compute75,
                 Compute80,
                 Compute86,
                 Compute87,
+                Compute88,
                 Compute89,
                 Compute90,
                 Compute100,
                 Compute101,
                 Compute103,
+                Compute110,
                 Compute120,
                 Compute120f,
                 Compute120a,
@@ -1011,8 +1031,6 @@ mod tests {
         let ok = |opt, val| assert_eq!(NvvmOption::from_str(opt), Ok(val));
         let err = |opt, s: &str| assert_eq!(NvvmOption::from_str(opt), Err(s.to_string()));
 
-        ok("-arch=compute_35", Arch(Compute35));
-        ok("-arch=compute_37", Arch(Compute37));
         ok("-arch=compute_50", Arch(Compute50));
         ok("-arch=compute_52", Arch(Compute52));
         ok("-arch=compute_53", Arch(Compute53));
@@ -1021,10 +1039,12 @@ mod tests {
         ok("-arch=compute_62", Arch(Compute62));
         ok("-arch=compute_70", Arch(Compute70));
         ok("-arch=compute_72", Arch(Compute72));
+        ok("-arch=compute_73", Arch(Compute73));
         ok("-arch=compute_75", Arch(Compute75));
         ok("-arch=compute_80", Arch(Compute80));
         ok("-arch=compute_86", Arch(Compute86));
         ok("-arch=compute_87", Arch(Compute87));
+        ok("-arch=compute_88", Arch(Compute88));
         ok("-arch=compute_89", Arch(Compute89));
         ok("-arch=compute_90", Arch(Compute90));
         ok("-arch=compute_90a", Arch(Compute90a));
@@ -1034,6 +1054,9 @@ mod tests {
         ok("-arch=compute_101", Arch(Compute101));
         ok("-arch=compute_101f", Arch(Compute101f));
         ok("-arch=compute_101a", Arch(Compute101a));
+        ok("-arch=compute_110", Arch(Compute110));
+        ok("-arch=compute_110f", Arch(Compute110f));
+        ok("-arch=compute_110a", Arch(Compute110a));
         ok("-arch=compute_120", Arch(Compute120));
         ok("-arch=compute_120f", Arch(Compute120f));
         ok("-arch=compute_120a", Arch(Compute120a));
@@ -1116,7 +1139,7 @@ mod tests {
         // Capability with multiple variants
         assert_eq!(
             NvvmArch::variants_for_capability(101),
-            vec![Compute101, Compute101f, Compute101a,]
+            vec![Compute101, Compute101f, Compute101a]
         );
 
         // Non-existent capability
diff --git a/guide/src/guide/compute_capabilities.md b/guide/src/guide/compute_capabilities.md
index 562be2f0..0fd30dce 100644
--- a/guide/src/guide/compute_capabilities.md
+++ b/guide/src/guide/compute_capabilities.md
@@ -190,7 +190,7 @@ These patterns work when using base architectures (no suffix), which enable all
 
 ```rust,no_run
 // Code that works up to compute 6.0 (not 6.1+)
-#[cfg(all(target_feature = "compute_35", not(target_feature = "compute_61")))]
+#[cfg(not(target_feature = "compute_61"))]
 {
     // Maximum compatibility implementation
 }

From 568b894f3c7b8b7e2366a891af5d945fa2e0b562 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Tue, 25 Nov 2025 10:53:44 +1100
Subject: [PATCH 2/3] Remove `ComputeCapability` and `CUDA_ARCH`.

CUDA C++ has the `__CUDA_ARCH__` macro for conditional compilation.
rust-cuda has a `CUDA_ARCH` environment variable that is similar, and
the `from_cuda_arch_env` method parses the environment variable's value
to produce a value of type `ComputeCapability`, which can be queried for
conditional compilation.

But `ComputeCapability` has a big problem. It's missing all the
capabilities after 80, including the 'a' and 'f' suffix ones. We could
just add them, but it implements `PartialOrd`/`Ord` and uses ordering to
determine feature availability. This was valid before the 'a' and 'f'
suffixes were added but is no longer, because some pairs of values are
incomparable. E.g. `100a` and `101a` -- each one has some features the
other doesn't, so neither is clearly larger than the other, and they're
also not equal.

So, what to do? Well, `CUDA_ARCH` was added in 2022. More recently,
another mechanism for conditional compilation was added:
`target_feature`, in #239. This does work with the 'a' and 'f' suffix
targets, and it's more Rust-y.

So this commit just removes `CUDA_ARCH` and `ComputeCapability`
(removing two more places where the default compilation target is
specified) and changes the only uses (in `cuda_std/src/atomic/mid.rs`)
to use `target_feature` instead. We don't have any tests exercising
conditional compilation, alas, but I did some manual checking locally to
verify that it works the same.
---
 crates/cuda_builder/src/lib.rs     |  3 --
 crates/cuda_std/src/atomic/mid.rs  | 19 +++++-------
 crates/cuda_std/src/cfg.rs         | 48 ------------------------------
 crates/cuda_std/src/lib.rs         |  1 -
 crates/rustc_codegen_nvvm/build.rs |  6 ----
 tests/compiletests/src/main.rs     |  1 -
 6 files changed, 7 insertions(+), 71 deletions(-)
 delete mode 100644 crates/cuda_std/src/cfg.rs

diff --git a/crates/cuda_builder/src/lib.rs b/crates/cuda_builder/src/lib.rs
index f187ede2..f54775c9 100644
--- a/crates/cuda_builder/src/lib.rs
+++ b/crates/cuda_builder/src/lib.rs
@@ -809,9 +809,6 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result<PathBuf, CudaBuilderError> {
         }
     }
 
-    let arch = format!("{:?}0", builder.arch);
-    cargo.env("CUDA_ARCH", arch.strip_prefix("Compute").unwrap());
-
     let cargo_encoded_rustflags = join_checking_for_separators(rustflags, "\x1f");
 
     let build = cargo
diff --git a/crates/cuda_std/src/atomic/mid.rs b/crates/cuda_std/src/atomic/mid.rs
index af72f070..ad451bff 100644
--- a/crates/cuda_std/src/atomic/mid.rs
+++ b/crates/cuda_std/src/atomic/mid.rs
@@ -7,19 +7,14 @@
 #![allow(dead_code, unused_imports)]
 
 use super::intrinsics;
-use crate::cfg::ComputeCapability;
 use crate::gpu_only;
 use core::sync::atomic::Ordering::{self, *};
 use paste::paste;
 
-fn ge_sm70() -> bool {
-    ComputeCapability::from_cuda_arch_env() >= ComputeCapability::Compute70
-}
-
 #[gpu_only]
 pub fn device_thread_fence(ordering: Ordering) {
     unsafe {
-        if ge_sm70() {
+        if cfg!(target_feature = "compute_70") {
             if ordering == SeqCst {
                 return intrinsics::fence_sc_device();
             }
@@ -38,7 +33,7 @@ pub fn device_thread_fence(ordering: Ordering) {
 #[gpu_only]
 pub fn block_thread_fence(ordering: Ordering) {
     unsafe {
-        if ge_sm70() {
+        if cfg!(target_feature = "compute_70") {
             if ordering == SeqCst {
                 return intrinsics::fence_sc_block();
             }
@@ -57,7 +52,7 @@ pub fn block_thread_fence(ordering: Ordering) {
 #[gpu_only]
 pub fn system_thread_fence(ordering: Ordering) {
     unsafe {
-        if ge_sm70() {
+        if cfg!(target_feature = "compute_70") {
             if ordering == SeqCst {
                 return intrinsics::fence_sc_system();
             }
@@ -80,7 +75,7 @@ macro_rules! load {
                 #[$crate::gpu_only]
                 #[allow(clippy::missing_safety_doc)]
                 pub unsafe fn [<atomic_load_ $width _ $scope>](ptr: *mut $type, ordering: Ordering) -> $type {
-                    if ge_sm70() {
+                    if cfg!(target_feature = "compute_70") {
                         match ordering {
                             SeqCst => {
                                 intrinsics::[<fence_sc_ $scope>]();
@@ -136,7 +131,7 @@ macro_rules! store {
                 #[$crate::gpu_only]
                 #[allow(clippy::missing_safety_doc)]
                 pub unsafe fn [<atomic_store_ $width _ $scope>](ptr: *mut $type, ordering: Ordering, val: $type) {
-                    if ge_sm70() {
+                    if cfg!(target_feature = "compute_70") {
                         match ordering {
                             SeqCst => {
                                 intrinsics::[<fence_sc_ $scope>]();
@@ -185,7 +180,7 @@ macro_rules! inner_fetch_ops_1_param {
                 #[$crate::gpu_only]
                 #[allow(clippy::missing_safety_doc)]
                 pub unsafe fn [<atomic_fetch_ $op _ $type _ $scope>](ptr: *mut $type, ordering: Ordering, val: $type) -> $type {
-                    if ge_sm70() {
+                    if cfg!(target_feature = "compute_70") {
                         match ordering {
                             SeqCst => {
                                 intrinsics::[<fence_sc_ $scope>]();
@@ -259,7 +254,7 @@ macro_rules! inner_cas {
                 #[$crate::gpu_only]
                 #[allow(clippy::missing_safety_doc)]
                 pub unsafe fn [<atomic_compare_and_swap_ $type _ $scope>](ptr: *mut $type, current: $type, new: $type, ordering: Ordering) -> $type {
-                    if ge_sm70() {
+                    if cfg!(target_feature = "compute_70") {
                         match ordering {
                             SeqCst => {
                                 intrinsics::[<fence_sc_ $scope>]();
diff --git a/crates/cuda_std/src/cfg.rs b/crates/cuda_std/src/cfg.rs
deleted file mode 100644
index b029e776..00000000
--- a/crates/cuda_std/src/cfg.rs
+++ /dev/null
@@ -1,48 +0,0 @@
-//! Utilities for configuring code based on the specified compute capability.
-
-use cuda_std_macros::gpu_only;
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
-pub enum ComputeCapability {
-    Compute35,
-    Compute37,
-    Compute50,
-    Compute52,
-    Compute53,
-    Compute60,
-    Compute61,
-    Compute62,
-    Compute70,
-    Compute72,
-    Compute75,
-    Compute80,
-}
-
-impl ComputeCapability {
-    /// Parses a compute capability from the `CUDA_ARCH` environment variable set by `cuda_builder`.
-    /// This is a compile-time variable so any comparisons of the compute capability should expand to constant
-    /// values.
-    ///
-    /// This allows you to use the current capability to decide what path to take in a function with the incorrect
-    /// path being optimized away.
-    #[gpu_only]
-    #[inline(always)]
-    pub fn from_cuda_arch_env() -> Self {
-        // set by cuda_builder
-        match env!("CUDA_ARCH") {
-            "350" => ComputeCapability::Compute35,
-            "370" => ComputeCapability::Compute37,
-            "500" => ComputeCapability::Compute50,
-            "520" => ComputeCapability::Compute52,
-            "530" => ComputeCapability::Compute53,
-            "600" => ComputeCapability::Compute60,
-            "610" => ComputeCapability::Compute61,
-            "620" => ComputeCapability::Compute62,
-            "700" => ComputeCapability::Compute70,
-            "720" => ComputeCapability::Compute72,
-            "750" => ComputeCapability::Compute75,
-            "800" => ComputeCapability::Compute80,
-            _ => panic!("CUDA_ARCH had an invalid value"),
-        }
-    }
-}
diff --git a/crates/cuda_std/src/lib.rs b/crates/cuda_std/src/lib.rs
index 8aef4d74..0cb671bf 100644
--- a/crates/cuda_std/src/lib.rs
+++ b/crates/cuda_std/src/lib.rs
@@ -43,7 +43,6 @@ pub mod misc;
 // WIP
 // pub mod rt;
 pub mod atomic;
-pub mod cfg;
 pub mod ptr;
 pub mod shared;
 pub mod thread;
diff --git a/crates/rustc_codegen_nvvm/build.rs b/crates/rustc_codegen_nvvm/build.rs
index ba453722..8bb25990 100644
--- a/crates/rustc_codegen_nvvm/build.rs
+++ b/crates/rustc_codegen_nvvm/build.rs
@@ -17,12 +17,6 @@ static REQUIRED_MAJOR_LLVM_VERSION: u8 = 7;
 
 fn main() {
     rustc_llvm_build();
-
-    // this is set by cuda_builder, but in case somebody is using the codegen
-    // manually, default to 520 (which is what nvvm defaults to).
-    if option_env!("CUDA_ARCH").is_none() {
-        println!("cargo:rustc-env=CUDA_ARCH=520")
-    }
 }
 
 fn fail(s: &str) -> ! {
diff --git a/tests/compiletests/src/main.rs b/tests/compiletests/src/main.rs
index a4614c4e..0e345813 100644
--- a/tests/compiletests/src/main.rs
+++ b/tests/compiletests/src/main.rs
@@ -241,7 +241,6 @@ fn build_deps(
         .arg("--target-dir")
         .arg(deps_target_dir)
         .env("CARGO_ENCODED_RUSTFLAGS", cargo_encoded_rustflags)
-        .env("CUDA_ARCH", "70")
         .stderr(std::process::Stdio::inherit())
         .stdout(std::process::Stdio::inherit())
         .status()

From e0dbda7bfeee350a88ee833b110d4f0280358ce4 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Tue, 25 Nov 2025 11:38:28 +1100
Subject: [PATCH 3/3] Remove `JitTarget`.

It includes some now-unsupported targets and is also missing some new
targets. The obvious thing to do is update it, but it's simpler and
better to recognize that it's identical to the generated
`driver_sys::CUjit_target` type and instead use that generated type
directly, avoiding the need for manual updating in the future.
Especially given that there is a non-trivial encoding for targets with
'a' and 'f' suffixes (which involves adding 2^16 and 2^17, respectively,
to the base number).

This seems fine because this `ModuleJitOption` type is obscure and has
no existing uses in the codebase.
---
 crates/cust/src/module.rs | 26 +-------------------------
 1 file changed, 1 insertion(+), 25 deletions(-)

diff --git a/crates/cust/src/module.rs b/crates/cust/src/module.rs
index 772f4a18..b7efd93c 100644
--- a/crates/cust/src/module.rs
+++ b/crates/cust/src/module.rs
@@ -34,30 +34,6 @@ pub enum OptLevel {
     O4 = 4,
 }
 
-/// The possible targets when JIT compiling a PTX module.
-#[non_exhaustive]
-#[repr(u32)]
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum JitTarget {
-    Compute20 = 20,
-    Compute21 = 21,
-    Compute30 = 30,
-    Compute32 = 32,
-    Compute35 = 35,
-    Compute37 = 37,
-    Compute50 = 50,
-    Compute52 = 52,
-    Compute53 = 53,
-    Compute60 = 60,
-    Compute61 = 61,
-    Compute62 = 62,
-    Compute70 = 70,
-    Compute72 = 72,
-    Compute75 = 75,
-    Compute80 = 80,
-    Compute86 = 86,
-}
-
 /// How to handle cases where a loaded module's data does not contain an exact match for the
 /// specified architecture.
 #[repr(u32)]
@@ -82,7 +58,7 @@ pub enum ModuleJitOption {
     /// [`ModuleJitOption::Target`].
     DetermineTargetFromContext,
     /// Specifies the target for the JIT compiler. Cannot be combined with [`ModuleJitOption::DetermineTargetFromContext`].
-    Target(JitTarget),
+    Target(driver_sys::CUjit_target),
     /// Specifies how to handle cases where a loaded module's data does not have an exact match for the specified
     /// architecture.
     Fallback(JitFallback),