diff --git a/crates/core_arch/src/nvptx/mod.rs b/crates/core_arch/src/nvptx/mod.rs index 3df767cc7f..b971a32bc8 100644 --- a/crates/core_arch/src/nvptx/mod.rs +++ b/crates/core_arch/src/nvptx/mod.rs @@ -13,6 +13,11 @@ use crate::ffi::c_void; +mod packed; + +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub use packed::*; + #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.nvvm.barrier0"] diff --git a/crates/core_arch/src/nvptx/packed.rs b/crates/core_arch/src/nvptx/packed.rs new file mode 100644 index 0000000000..c465b0cc61 --- /dev/null +++ b/crates/core_arch/src/nvptx/packed.rs @@ -0,0 +1,93 @@ +//! NVPTX Packed data types (SIMD) +//! +//! Packed Data Types is what PTX calls SIMD types. See [PTX ISA (Packed Data Types)](https://docs.nvidia.com/cuda/parallel-thread-execution/#packed-data-types) for a full reference. + +// Note: #[assert_instr] tests are not actually being run on nvptx due to being a `no_std` target incapable of running tests. Something like FileCheck would be appropriate for verifying the correct instruction is used. + +use crate::intrinsics::simd::*; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.minnum.v2f16"] + fn llvm_f16x2_min(a: f16x2, b: f16x2) -> f16x2; + #[link_name = "llvm.maxnum.v2f16"] + fn llvm_f16x2_max(a: f16x2, b: f16x2) -> f16x2; +} + +types! { + #![unstable(feature = "stdarch_nvptx", issue = "111199")] + + /// PTX-specific 32-bit wide floating point (f16 x 2) vector type + pub struct f16x2(2 x f16); + +} + +/// Add two values +/// +/// +#[inline] +#[cfg_attr(test, assert_instr(add.rn.f16x22))] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn f16x2_add(a: f16x2, b: f16x2) -> f16x2 { + simd_add(a, b) +} + +/// Subtract two values +/// +/// +#[inline] +#[cfg_attr(test, assert_instr(sub.rn.f16x2))] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn f16x2_sub(a: f16x2, b: f16x2) -> f16x2 { + simd_sub(a, b) +} + +/// Multiply two values +/// +/// +#[inline] +#[cfg_attr(test, assert_instr(mul.rn.f16x2))] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn f16x2_mul(a: f16x2, b: f16x2) -> f16x2 { + simd_mul(a, b) +} + +/// Fused multiply-add +/// +/// +#[inline] +#[cfg_attr(test, assert_instr(fma.rn.f16x2))] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn f16x2_fma(a: f16x2, b: f16x2, c: f16x2) -> f16x2 { + simd_fma(a, b, c) +} + +/// Arithmetic negate +/// +/// +#[inline] +#[cfg_attr(test, assert_instr(neg.f16x2))] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn f16x2_neg(a: f16x2) -> f16x2 { + simd_neg(a) +} + +/// Find the minimum of two values +/// +/// +#[inline] +#[cfg_attr(test, assert_instr(min.f16x2))] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn f16x2_min(a: f16x2, b: f16x2) -> f16x2 { + llvm_f16x2_min(a, b) +} + +/// Find the maximum of two values +/// +/// +#[inline] +#[cfg_attr(test, assert_instr(max.f16x2))] +#[unstable(feature = "stdarch_nvptx", issue = "111199")] +pub unsafe fn f16x2_max(a: f16x2, b: f16x2) -> f16x2 { + llvm_f16x2_max(a, b) +}