Skip to content

Commit 85b1aef

Browse files
brandonroscodex
andcommitted
feat(llvm): add llvm19 support
Add the initial llvm19 cargo/build.rs plumbing while preserving the llvm7\ncheck path. Assemble a v19 libintrinsics bitcode at build time and route\nnvvm.rs through the build-script-provided path.\n\nDocument the validated baseline on the current host and the first Layer 1\nblocker: the existing C++ shim no longer builds unchanged against LLVM 19\nbecause rustllvm.h still expects headers like llvm/ADT/Triple.h. RUST_CUDA_ALLOW_LEGACY_ARCH_WITH_LLVM19 compute_100 target working through compilation errors working throw sigsegv on vecadd nix flake libintrinsics libintrinsics chore(llvm19): close out Layer 3 pre-smoke work Finalize the Layer 3 plan, add env-driven final-module and LLVM IR capture hooks to vecadd, and validate the harness locally so the next phase can move straight to CUDA 12.9+ smoke testing. refactor(llvm19): close out Layer 2 containment Add named Rust-side containment helpers for debug info and target machine creation, make the current ThinLTO behavior explicit, and update LLVM19_PLAN.md to mark Layers 2c and 2d complete. refactor(llvm19): start Layer 2 helper containment Add a small Rust-side helper surface in src/llvm.rs for call-building, symbol insertion, and debug-location setting, then migrate the obvious callers without introducing LLVM-version cfg branching. Update LLVM19_PLAN.md to reflect the real Layer 2 state: 2a is complete, 2b is complete, 2c is partially landed, and 2d is still pending. Include the current .gitignore change in this checkpoint as requested. feat(llvm19): complete Layer 1 C++ shim bridge Bridge the wrapper headers and C++ shims so rustc_codegen_nvvm now builds against both LLVM 7 and LLVM 19. This adds the LLVM 19 wrapper headers, ports RustWrapper.cpp and PassWrapper.cpp through the current checkpoint, and records the completed Layer 1 progress and remaining Layer 2 caveats in the plan. Co-Authored-By: OpenAI Codex <codex@openai.com>
1 parent 0bc607f commit 85b1aef

31 files changed

Lines changed: 1762 additions & 258 deletions

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,7 @@ book
22
/target
33
**/.vscode
44
.devcontainer
5+
.codex
6+
rustc-ice-*.txt
7+
.nix-driver-libs
8+
.claude

crates/cuda_builder/src/lib.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,21 @@ pub struct CudaBuilder {
196196
pub final_module_path: Option<PathBuf>,
197197
}
198198

199+
/// Default arch for new `CudaBuilder`s.
200+
///
201+
/// When the backend is being built with LLVM 19 support (detected via the `LLVM_CONFIG_19`
202+
/// env var — the same signal `rustc_codegen_nvvm`'s build script uses), default to the
203+
/// lowest Blackwell compute capability (`Compute100`). Pre-Blackwell archs use the legacy
204+
/// LLVM 7 NVVM dialect, so pairing them with an LLVM 19 backend is never the right choice.
205+
/// Callers can still override via [`CudaBuilder::arch`].
206+
fn default_arch() -> NvvmArch {
207+
if env::var_os("LLVM_CONFIG_19").is_some() {
208+
NvvmArch::Compute100
209+
} else {
210+
NvvmArch::default()
211+
}
212+
}
213+
199214
impl CudaBuilder {
200215
pub fn new(path_to_crate_root: impl AsRef<Path>) -> Self {
201216
Self {
@@ -204,7 +219,7 @@ impl CudaBuilder {
204219
ptx_file_copy_path: None,
205220
generate_line_info: true,
206221
nvvm_opts: true,
207-
arch: NvvmArch::default(),
222+
arch: default_arch(),
208223
ftz: false,
209224
fast_sqrt: false,
210225
fast_div: false,
@@ -355,6 +370,7 @@ impl CudaBuilder {
355370
/// ptx file. If [`ptx_file_copy_path`](Self::ptx_file_copy_path) is set, this returns the copied path.
356371
pub fn build(self) -> Result<PathBuf, CudaBuilderError> {
357372
println!("cargo:rerun-if-changed={}", self.path_to_crate.display());
373+
println!("cargo:rerun-if-env-changed=LLVM_CONFIG_19");
358374
let path = invoke_rustc(&self)?;
359375
if let Some(copy_path) = self.ptx_file_copy_path {
360376
std::fs::copy(path, &copy_path).map_err(CudaBuilderError::FailedToCopyPtxFile)?;

crates/cust/src/memory/unified.rs

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,29 @@ use crate::memory::UnifiedPointer;
1919
use crate::memory::malloc::{cuda_free_unified, cuda_malloc_unified};
2020
use crate::prelude::Stream;
2121

22+
#[cfg(any(cuMemPrefetchAsync_v2, cuMemAdvise_v2))]
23+
unsafe fn cu_mem_location(
24+
type_: driver_sys::CUmemLocationType,
25+
id: std::os::raw::c_int,
26+
) -> driver_sys::CUmemLocation {
27+
let mut location = std::mem::MaybeUninit::<driver_sys::CUmemLocation>::zeroed();
28+
let location_ptr = location.as_mut_ptr();
29+
30+
// Support both older bindgen output (`{ type_, id }`) and the newer
31+
// anonymous-union layout emitted from CUDA 13.2 headers.
32+
unsafe {
33+
(*location_ptr).type_ = type_;
34+
std::ptr::write(
35+
(location_ptr.cast::<u8>())
36+
.add(std::mem::size_of::<driver_sys::CUmemLocationType>())
37+
.cast::<std::os::raw::c_int>(),
38+
id,
39+
);
40+
41+
location.assume_init()
42+
}
43+
}
44+
2245
/// A pointer type for heap-allocation in CUDA unified memory.
2346
///
2447
/// See the [`module-level documentation`](../memory/index.html) for more information on unified
@@ -640,17 +663,13 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
640663
let mem_size = std::mem::size_of_val(slice);
641664

642665
unsafe {
643-
let id = -1; // -1 is CU_DEVICE_CPU
644666
driver_sys::cuMemPrefetchAsync(
645667
slice.as_ptr() as driver_sys::CUdeviceptr,
646668
mem_size,
647669
#[cfg(cuMemPrefetchAsync_v2)]
648-
driver_sys::CUmemLocation {
649-
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
650-
id,
651-
},
670+
cu_mem_location(driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_HOST, 0),
652671
#[cfg(not(cuMemPrefetchAsync_v2))]
653-
id,
672+
-1, // -1 is CU_DEVICE_CPU
654673
#[cfg(cuMemPrefetchAsync_v2)]
655674
0, // flags for future use, must be 0 as of CUDA 13.0
656675
stream.as_inner(),
@@ -691,10 +710,7 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
691710
slice.as_ptr() as driver_sys::CUdeviceptr,
692711
mem_size,
693712
#[cfg(cuMemPrefetchAsync_v2)]
694-
driver_sys::CUmemLocation {
695-
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
696-
id,
697-
},
713+
cu_mem_location(driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE, id),
698714
#[cfg(not(cuMemPrefetchAsync_v2))]
699715
id,
700716
#[cfg(cuMemPrefetchAsync_v2)]
@@ -727,18 +743,14 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
727743
};
728744

729745
unsafe {
730-
let id = 0;
731746
driver_sys::cuMemAdvise(
732747
slice.as_ptr() as driver_sys::CUdeviceptr,
733748
mem_size,
734749
advice,
735750
#[cfg(cuMemAdvise_v2)]
736-
driver_sys::CUmemLocation {
737-
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
738-
id,
739-
},
751+
cu_mem_location(driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_HOST, 0),
740752
#[cfg(not(cuMemAdvise_v2))]
741-
id,
753+
0,
742754
)
743755
.to_result()?;
744756
}
@@ -775,9 +787,11 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
775787
mem_size,
776788
driver_sys::CUmem_advise::CU_MEM_ADVISE_SET_PREFERRED_LOCATION,
777789
#[cfg(cuMemAdvise_v2)]
778-
driver_sys::CUmemLocation {
779-
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
780-
id,
790+
match preferred_location {
791+
Some(_) => {
792+
cu_mem_location(driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE, id)
793+
}
794+
None => cu_mem_location(driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_HOST, 0),
781795
},
782796
#[cfg(not(cuMemAdvise_v2))]
783797
id,
@@ -793,18 +807,14 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
793807
let mem_size = std::mem::size_of_val(slice);
794808

795809
unsafe {
796-
let id = 0;
797810
driver_sys::cuMemAdvise(
798811
slice.as_ptr() as driver_sys::CUdeviceptr,
799812
mem_size,
800813
driver_sys::CUmem_advise::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION,
801814
#[cfg(cuMemAdvise_v2)]
802-
driver_sys::CUmemLocation {
803-
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
804-
id,
805-
},
815+
cu_mem_location(driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_HOST, 0),
806816
#[cfg(not(cuMemAdvise_v2))]
807-
id,
817+
0,
808818
)
809819
.to_result()?;
810820
}

crates/nvvm/src/lib.rs

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,10 @@ pub enum NvvmArch {
325325
Compute89,
326326
Compute90,
327327
Compute90a,
328+
/// First Blackwell arch and the cutoff for NVVM's modern IR dialect — everything at
329+
/// or above this capability uses the LLVM 19-flavored bitcode accepted by CUDA 12.9+
330+
/// `libnvvm`. See [`NvvmArch::uses_modern_ir_dialect`]. This is also the default arch
331+
/// `cuda_builder` picks when the backend is built with `LLVM_CONFIG_19` set.
328332
Compute100,
329333
Compute100f,
330334
Compute100a,
@@ -448,6 +452,14 @@ impl NvvmArch {
448452
self.capability_value() % 10
449453
}
450454

455+
/// Whether this target uses NVVM's modern IR dialect rather than the legacy LLVM 7 dialect.
456+
///
457+
/// CUDA 13.2 documents the modern dialect as Blackwell-and-later only, which begins at
458+
/// `compute_100`.
459+
pub fn uses_modern_ir_dialect(&self) -> bool {
460+
self.capability_value() >= 100
461+
}
462+
451463
/// Get the target feature string (e.g., "compute_50" for `Compute50`, "compute_90a" for
452464
/// `Compute90a`).
453465
pub fn target_feature(&self) -> &'static str {
@@ -739,7 +751,24 @@ impl NvvmProgram {
739751
/// Verify the program without actually compiling it. In the case of invalid IR, you can find
740752
/// more detailed error info by calling [`compiler_log`](Self::compiler_log).
741753
pub fn verify(&self) -> Result<(), NvvmError> {
742-
unsafe { nvvm_sys::nvvmVerifyProgram(self.raw, 0, null_mut()).to_result() }
754+
self.verify_with_options(&[])
755+
}
756+
757+
/// Like [`verify`](Self::verify), but runs the verifier with the same `NvvmOption`s that will
758+
/// be passed to [`compile`](Self::compile). Passing the user-selected `-arch=compute_XXX` in
759+
/// particular matters for CUDA 12.9+ / LLVM 19 bitcode: without it the verifier can fall back
760+
/// to the legacy LLVM 7 parser and reject modern-dialect bitcode that would otherwise compile
761+
/// fine.
762+
pub fn verify_with_options(&self, options: &[NvvmOption]) -> Result<(), NvvmError> {
763+
unsafe {
764+
let options = options.iter().map(|x| format!("{x}\0")).collect::<Vec<_>>();
765+
let mut options_ptr = options
766+
.iter()
767+
.map(|x| x.as_ptr().cast())
768+
.collect::<Vec<_>>();
769+
nvvm_sys::nvvmVerifyProgram(self.raw, options.len() as i32, options_ptr.as_mut_ptr())
770+
.to_result()
771+
}
743772
}
744773
}
745774

crates/rustc_codegen_nvvm/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ readme = "../../README.md"
1414
[lib]
1515
crate-type = ["dylib"]
1616

17+
[features]
18+
default = []
19+
llvm19 = []
20+
1721
[dependencies]
1822
nvvm = { version = "0.1", path = "../nvvm" }
1923
rustc-demangle = "0.1.24"

0 commit comments

Comments
 (0)