Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support #[repr(simd)] types in input/output of s390x inline assembly #131664

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions compiler/rustc_codegen_gcc/src/asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -682,9 +682,8 @@ fn reg_to_gcc(reg: InlineAsmRegOrRegClass) -> ConstraintOrRegister {
InlineAsmRegClass::S390x(S390xInlineAsmRegClass::reg) => "r",
InlineAsmRegClass::S390x(S390xInlineAsmRegClass::reg_addr) => "a",
InlineAsmRegClass::S390x(S390xInlineAsmRegClass::freg) => "f",
InlineAsmRegClass::S390x(
S390xInlineAsmRegClass::vreg | S390xInlineAsmRegClass::areg,
) => {
InlineAsmRegClass::S390x(S390xInlineAsmRegClass::vreg) => "v",
InlineAsmRegClass::S390x(S390xInlineAsmRegClass::areg) => {
unreachable!("clobber-only")
}
InlineAsmRegClass::Err => unreachable!(),
Expand Down Expand Up @@ -762,7 +761,8 @@ fn dummy_output_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, reg: InlineAsmRegCl
S390xInlineAsmRegClass::reg | S390xInlineAsmRegClass::reg_addr,
) => cx.type_i32(),
InlineAsmRegClass::S390x(S390xInlineAsmRegClass::freg) => cx.type_f64(),
InlineAsmRegClass::S390x(S390xInlineAsmRegClass::vreg | S390xInlineAsmRegClass::areg) => {
InlineAsmRegClass::S390x(S390xInlineAsmRegClass::vreg) => cx.type_vector(cx.type_i64(), 2),
InlineAsmRegClass::S390x(S390xInlineAsmRegClass::areg) => {
unreachable!("clobber-only")
}
InlineAsmRegClass::Msp430(Msp430InlineAsmRegClass::reg) => cx.type_i16(),
Expand Down
6 changes: 4 additions & 2 deletions compiler/rustc_codegen_llvm/src/asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,8 @@ fn reg_to_llvm(reg: InlineAsmRegOrRegClass, layout: Option<&TyAndLayout<'_>>) ->
S390x(S390xInlineAsmRegClass::reg) => "r",
S390x(S390xInlineAsmRegClass::reg_addr) => "a",
S390x(S390xInlineAsmRegClass::freg) => "f",
S390x(S390xInlineAsmRegClass::vreg | S390xInlineAsmRegClass::areg) => {
S390x(S390xInlineAsmRegClass::vreg) => "v",
S390x(S390xInlineAsmRegClass::areg) => {
unreachable!("clobber-only")
}
Msp430(Msp430InlineAsmRegClass::reg) => "r",
Expand Down Expand Up @@ -828,7 +829,8 @@ fn dummy_output_type<'ll>(cx: &CodegenCx<'ll, '_>, reg: InlineAsmRegClass) -> &'
Avr(AvrInlineAsmRegClass::reg_ptr) => cx.type_i16(),
S390x(S390xInlineAsmRegClass::reg | S390xInlineAsmRegClass::reg_addr) => cx.type_i32(),
S390x(S390xInlineAsmRegClass::freg) => cx.type_f64(),
S390x(S390xInlineAsmRegClass::vreg | S390xInlineAsmRegClass::areg) => {
S390x(S390xInlineAsmRegClass::vreg) => cx.type_vector(cx.type_i64(), 2),
S390x(S390xInlineAsmRegClass::areg) => {
unreachable!("clobber-only")
}
Msp430(Msp430InlineAsmRegClass::reg) => cx.type_i16(),
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2126,6 +2126,7 @@ symbols! {
vec_pop,
vec_with_capacity,
vecdeque_iter,
vector,
version,
vfp2,
vis,
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_target/src/asm/s390x.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ impl S390xInlineAsmRegClass {
match self {
Self::reg | Self::reg_addr => types! { _: I8, I16, I32, I64; },
Self::freg => types! { _: F32, F64; },
Self::vreg => &[],
// FIXME: I8, I16, I32, I64, F32, F64
Self::vreg => types! {
vector: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2);
},
Self::areg => &[],
}
}
Expand Down
47 changes: 36 additions & 11 deletions compiler/rustc_target/src/callconv/s390x.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,35 @@
// FIXME: The assumes we're using the non-vector ABI, i.e., compiling
// for a pre-z13 machine or using -mno-vx.

use crate::abi::call::{ArgAbi, FnAbi, Reg};
use crate::abi::{HasDataLayout, TyAbiInterface};
use crate::abi::call::{ArgAbi, FnAbi, Reg, RegKind};
use crate::abi::{Abi, HasDataLayout, Size, TyAbiInterface, TyAndLayout};
use crate::spec::HasTargetSpec;

fn contains_vector<'a, Ty, C>(cx: &C, layout: TyAndLayout<'a, Ty>, expected_size: Size) -> bool
where
Ty: TyAbiInterface<'a, C> + Copy,
{
match layout.abi {
Abi::Uninhabited | Abi::Scalar(_) | Abi::ScalarPair(..) => false,
Abi::Vector { .. } => layout.size == expected_size,
Abi::Aggregate { .. } => {
for i in 0..layout.fields.count() {
if contains_vector(cx, layout.field(cx, i), expected_size) {
return true;
}
}
false
}
}
}

fn classify_ret<Ty>(ret: &mut ArgAbi<'_, Ty>) {
if !ret.layout.is_aggregate() && ret.layout.size.bits() <= 64 {
let size = ret.layout.size;
if size.bits() <= 128 && matches!(ret.layout.abi, Abi::Vector { .. }) {
return;
}
if !ret.layout.is_aggregate() && size.bits() <= 64 {
ret.extend_integer_width_to(64);
} else {
ret.make_indirect();
return;
}
ret.make_indirect();
}

fn classify_arg<'a, Ty, C>(cx: &C, arg: &mut ArgAbi<'a, Ty>)
Expand All @@ -32,19 +51,25 @@ where
}
return;
}
if !arg.layout.is_aggregate() && arg.layout.size.bits() <= 64 {

let size = arg.layout.size;
if size.bits() <= 128 && contains_vector(cx, arg.layout, size) {
arg.cast_to(Reg { kind: RegKind::Vector, size });
return;
}
if !arg.layout.is_aggregate() && size.bits() <= 64 {
arg.extend_integer_width_to(64);
return;
}

if arg.layout.is_single_fp_element(cx) {
match arg.layout.size.bytes() {
match size.bytes() {
4 => arg.cast_to(Reg::f32()),
8 => arg.cast_to(Reg::f64()),
_ => arg.make_indirect(),
}
} else {
match arg.layout.size.bytes() {
match size.bytes() {
1 => arg.cast_to(Reg::i8()),
2 => arg.cast_to(Reg::i16()),
4 => arg.cast_to(Reg::i32()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@ pub(crate) fn target() -> Target {
base.endian = Endian::Big;
// z10 is the oldest CPU supported by LLVM
base.cpu = "z10".into();
// FIXME: The ABI implementation in abi/call/s390x.rs is for now hard-coded to assume the no-vector
// ABI. Pass the -vector feature string to LLVM to respect this assumption.
base.features = "-vector".into();
base.max_atomic_width = Some(128);
base.min_global_align = Some(16);
base.stack_probes = StackProbeType::Inline;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@ pub(crate) fn target() -> Target {
base.endian = Endian::Big;
// z10 is the oldest CPU supported by LLVM
base.cpu = "z10".into();
// FIXME: The ABI implementation in abi/call/s390x.rs is for now hard-coded to assume the no-vector
// ABI. Pass the -vector feature string to LLVM to respect this assumption.
base.features = "-vector".into();
base.max_atomic_width = Some(128);
base.min_global_align = Some(16);
base.static_position_independent_executables = true;
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_target/src/target_features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,7 @@ pub fn all_known_features() -> impl Iterator<Item = (&'static str, Stability)> {
const X86_FEATURES_FOR_CORRECT_VECTOR_ABI: &'static [(u64, &'static str)] =
&[(128, "sse"), (256, "avx"), (512, "avx512f")];
const AARCH64_FEATURES_FOR_CORRECT_VECTOR_ABI: &'static [(u64, &'static str)] = &[(128, "neon")];
const S390X_FEATURES_FOR_CORRECT_VECTOR_ABI: &'static [(u64, &'static str)] = &[(128, "vector")];

impl super::spec::Target {
pub fn supported_target_features(
Expand Down Expand Up @@ -555,6 +556,7 @@ impl super::spec::Target {
match &*self.arch {
"x86" | "x86_64" => Some(X86_FEATURES_FOR_CORRECT_VECTOR_ABI),
"aarch64" => Some(AARCH64_FEATURES_FOR_CORRECT_VECTOR_ABI),
"s390x" => Some(S390X_FEATURES_FOR_CORRECT_VECTOR_ABI),
// FIXME: add support for non-tier1 architectures
_ => None,
}
Expand Down
110 changes: 108 additions & 2 deletions tests/assembly/asm/s390x-types.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
//@ revisions: s390x
//@ revisions: s390x s390x_vector
//@ assembly-output: emit-asm
//@[s390x] compile-flags: --target s390x-unknown-linux-gnu
//@[s390x] needs-llvm-components: systemz
//@[s390x_vector] compile-flags: --target s390x-unknown-linux-gnu -C target-feature=+vector
//@[s390x_vector] needs-llvm-components: systemz
//@ compile-flags: -Zmerge-functions=disabled

#![feature(no_core, lang_items, rustc_attrs, repr_simd, asm_experimental_arch)]
Expand All @@ -27,8 +29,23 @@ trait Sized {}
#[lang = "copy"]
trait Copy {}

impl<T: Copy, const N: usize> Copy for [T; N] {}

type ptr = *const i32;

#[repr(simd)]
pub struct i8x16([i8; 16]);
#[repr(simd)]
pub struct i16x8([i16; 8]);
#[repr(simd)]
pub struct i32x4([i32; 4]);
#[repr(simd)]
pub struct i64x2([i64; 2]);
#[repr(simd)]
pub struct f32x4([f32; 4]);
#[repr(simd)]
pub struct f64x2([f64; 2]);

impl Copy for i8 {}
impl Copy for u8 {}
impl Copy for i16 {}
Expand All @@ -37,6 +54,12 @@ impl Copy for i64 {}
impl Copy for f32 {}
impl Copy for f64 {}
impl Copy for ptr {}
impl Copy for i8x16 {}
impl Copy for i16x8 {}
impl Copy for i32x4 {}
impl Copy for i64x2 {}
impl Copy for f32x4 {}
impl Copy for f64x2 {}

extern "C" {
fn extern_func();
Expand Down Expand Up @@ -65,7 +88,6 @@ macro_rules! check_reg { ($func:ident, $ty:ty, $reg:tt, $mov:literal) => {
// CHECK: #APP
// CHECK: brasl %r14, extern_func
// CHECK: #NO_APP
#[cfg(s390x)]
#[no_mangle]
pub unsafe fn sym_fn_32() {
asm!("brasl %r14, {}", sym extern_func);
Expand Down Expand Up @@ -146,6 +168,48 @@ check!(reg_f64, f64, freg, "ldr");
// CHECK: #NO_APP
check!(reg_ptr, ptr, reg, "lgr");

// s390x_vector-LABEL: vreg_i8x16:
// s390x_vector: #APP
// s390x_vector: vlr %v{{[0-9]+}}, %v{{[0-9]+}}
// s390x_vector: #NO_APP
#[cfg(s390x_vector)]
check!(vreg_i8x16, i8x16, vreg, "vlr");

// s390x_vector-LABEL: vreg_i16x8:
// s390x_vector: #APP
// s390x_vector: vlr %v{{[0-9]+}}, %v{{[0-9]+}}
// s390x_vector: #NO_APP
#[cfg(s390x_vector)]
check!(vreg_i16x8, i16x8, vreg, "vlr");

// s390x_vector-LABEL: vreg_i32x4:
// s390x_vector: #APP
// s390x_vector: vlr %v{{[0-9]+}}, %v{{[0-9]+}}
// s390x_vector: #NO_APP
#[cfg(s390x_vector)]
check!(vreg_i32x4, i32x4, vreg, "vlr");

// s390x_vector-LABEL: vreg_i64x2:
// s390x_vector: #APP
// s390x_vector: vlr %v{{[0-9]+}}, %v{{[0-9]+}}
// s390x_vector: #NO_APP
#[cfg(s390x_vector)]
check!(vreg_i64x2, i64x2, vreg, "vlr");

// s390x_vector-LABEL: vreg_f32x4:
// s390x_vector: #APP
// s390x_vector: vlr %v{{[0-9]+}}, %v{{[0-9]+}}
// s390x_vector: #NO_APP
#[cfg(s390x_vector)]
check!(vreg_f32x4, f32x4, vreg, "vlr");

// s390x_vector-LABEL: vreg_f64x2:
// s390x_vector: #APP
// s390x_vector: vlr %v{{[0-9]+}}, %v{{[0-9]+}}
// s390x_vector: #NO_APP
#[cfg(s390x_vector)]
check!(vreg_f64x2, f64x2, vreg, "vlr");

// CHECK-LABEL: r0_i8:
// CHECK: #APP
// CHECK: lr %r0, %r0
Expand Down Expand Up @@ -181,3 +245,45 @@ check_reg!(f0_f32, f32, "f0", "ler");
// CHECK: ldr %f0, %f0
// CHECK: #NO_APP
check_reg!(f0_f64, f64, "f0", "ldr");

// s390x_vector-LABEL: v0_i8x16:
// s390x_vector: #APP
// s390x_vector: vlr %v0, %v0
// s390x_vector: #NO_APP
#[cfg(s390x_vector)]
check_reg!(v0_i8x16, i8x16, "v0", "vlr");

// s390x_vector-LABEL: v0_i16x8:
// s390x_vector: #APP
// s390x_vector: vlr %v0, %v0
// s390x_vector: #NO_APP
#[cfg(s390x_vector)]
check_reg!(v0_i16x8, i16x8, "v0", "vlr");

// s390x_vector-LABEL: v0_i32x4:
// s390x_vector: #APP
// s390x_vector: vlr %v0, %v0
// s390x_vector: #NO_APP
#[cfg(s390x_vector)]
check_reg!(v0_i32x4, i32x4, "v0", "vlr");

// s390x_vector-LABEL: v0_i64x2:
// s390x_vector: #APP
// s390x_vector: vlr %v0, %v0
// s390x_vector: #NO_APP
#[cfg(s390x_vector)]
check_reg!(v0_i64x2, i64x2, "v0", "vlr");

// s390x_vector-LABEL: v0_f32x4:
// s390x_vector: #APP
// s390x_vector: vlr %v0, %v0
// s390x_vector: #NO_APP
#[cfg(s390x_vector)]
check_reg!(v0_f32x4, f32x4, "v0", "vlr");

// s390x_vector-LABEL: v0_f64x2:
// s390x_vector: #APP
// s390x_vector: vlr %v0, %v0
// s390x_vector: #NO_APP
#[cfg(s390x_vector)]
check_reg!(v0_f64x2, f64x2, "v0", "vlr");
Loading
Loading