From 5a8443a3d344b5b1fb2383f40122e7fd43c77d75 Mon Sep 17 00:00:00 2001 From: "Kevin K." Date: Tue, 19 May 2026 16:37:28 +0200 Subject: [PATCH 1/2] Clarify importance of `target-cpu` flag for offload --- src/offload/usage.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/offload/usage.md b/src/offload/usage.md index 275290fc5..0fb6d37b2 100644 --- a/src/offload/usage.md +++ b/src/offload/usage.md @@ -77,22 +77,23 @@ pub extern "gpu-kernel" fn kernel_1(x: *mut [f64; 256]) { ``` ## Compile instructions -It is important to use a clang compiler build on the same llvm as rustc. +It is important to use a clang compiler build on the same LLVM as rustc. Just calling clang without the full path will likely use your system clang, which probably will be incompatible. So either substitute clang/lld invocations below with absolute path, or set your `PATH` accordingly. -First we generate the device (gpu) code. -Replace the target-cpu with the right code for your gpu. +First we generate the device (GPU) code. + > [!IMPORTANT] + > Replace the `target-cpu` (gfx90a) with the right code for your GPU. These are often referred to as "LLVM target names"[^list]. ``` RUSTFLAGS="-Ctarget-cpu=gfx90a --emit=llvm-bc,llvm-ir -Zoffload=Device -Csave-temps -Zunstable-options" cargo +offload build -Zunstable-options -r -v --target amdgcn-amd-amdhsa -Zbuild-std=core ``` You might afterwards need to copy your target/release/deps/.bc to lib.bc for now, before the next step. -Now we generate the host (cpu) code. +Now we generate the host (CPU) code. ``` RUSTFLAGS="--emit=llvm-bc,llvm-ir -Csave-temps -Zoffload=Host=/p/lustre1/drehwald1/prog/offload/r/target/amdgcn-amd-amdhsa/release/deps/host.out -Zunstable-options" cargo +offload build -r ``` -This call also does a lot of work and generates multiple intermediate files for llvm offload. +This call also does a lot of work and generates multiple intermediate files for LLVM offload. While we integrated most offload steps into rustc by now, one binary invocation still remains for now: ``` @@ -100,7 +101,7 @@ While we integrated most offload steps into rustc by now, one binary invocation ``` You can try to find the paths to those files on your system. -However, I recommend to not fix the paths, but rather just re-generate them by copying a bare-mode openmp example and compiling it with your clang. +However, I recommend to not fix the paths, but rather just re-generate them by copying a bare-mode OpenMP example and compiling it with your clang. By adding `-###` to your clang invocation, you can see the invidual steps. It will show multiple steps, just look for the clang-linker-wrapper example. Make sure to still include the path to the `host.o` file, and not whatever tmp file you got when compiling your c++ example with the following call. @@ -121,3 +122,5 @@ To receive more information about the memory transfer, you can enable info print ``` LIBOMPTARGET_INFO=-1 ./main ``` + +[^list]: https://rocm.docs.amd.com/en/latest/reference/gpu-arch-specs.html or https://developer.nvidia.com/cuda/gpus. Alternatively, check `rustc --print target-cpus`. From f3898a708e28eaef6605b52a87714aa2ca228837 Mon Sep 17 00:00:00 2001 From: "Kevin K." Date: Tue, 19 May 2026 16:44:27 +0200 Subject: [PATCH 2/2] fix: use div with class warning --- src/offload/usage.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/offload/usage.md b/src/offload/usage.md index 0fb6d37b2..adeac2021 100644 --- a/src/offload/usage.md +++ b/src/offload/usage.md @@ -82,8 +82,13 @@ Just calling clang without the full path will likely use your system clang, whic So either substitute clang/lld invocations below with absolute path, or set your `PATH` accordingly. First we generate the device (GPU) code. - > [!IMPORTANT] - > Replace the `target-cpu` (gfx90a) with the right code for your GPU. These are often referred to as "LLVM target names"[^list]. + +
+ +Replace the `target-cpu` (gfx90a) with the right code for your GPU. These are often referred to as "LLVM target names"[^list]. + +
+ ``` RUSTFLAGS="-Ctarget-cpu=gfx90a --emit=llvm-bc,llvm-ir -Zoffload=Device -Csave-temps -Zunstable-options" cargo +offload build -Zunstable-options -r -v --target amdgcn-amd-amdhsa -Zbuild-std=core ```