From d7b63a38043364ff003031efc35143e6f834cf7c Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sun, 12 Jan 2025 15:43:48 +0100 Subject: [PATCH] x86: use SSE registers to return float values --- compiler/rustc_target/src/callconv/mod.rs | 9 +- compiler/rustc_target/src/callconv/x86.rs | 12 ++- tests/assembly/x86-return-float.rs | 111 ++++++++++++---------- 3 files changed, 74 insertions(+), 58 deletions(-) diff --git a/compiler/rustc_target/src/callconv/mod.rs b/compiler/rustc_target/src/callconv/mod.rs index a94b35f9966d4..cd60ea0718376 100644 --- a/compiler/rustc_target/src/callconv/mod.rs +++ b/compiler/rustc_target/src/callconv/mod.rs @@ -389,6 +389,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> { /// Pass this argument directly instead. Should NOT be used! /// Only exists because of past ABI mistakes that will take time to fix /// (see ). + #[track_caller] pub fn make_direct_deprecated(&mut self) { match self.mode { PassMode::Indirect { .. } => { @@ -401,6 +402,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> { /// Pass this argument indirectly, by passing a (thin or wide) pointer to the argument instead. /// This is valid for both sized and unsized arguments. + #[track_caller] pub fn make_indirect(&mut self) { match self.mode { PassMode::Direct(_) | PassMode::Pair(_, _) => { @@ -415,6 +417,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> { /// Same as `make_indirect`, but for arguments that are ignored. Only needed for ABIs that pass /// ZSTs indirectly. + #[track_caller] pub fn make_indirect_from_ignore(&mut self) { match self.mode { PassMode::Ignore => { @@ -773,9 +776,9 @@ impl<'a, Ty> FnAbi<'a, Ty> { if arg_idx.is_none() && arg.layout.size > Pointer(AddressSpace::DATA).size(cx) * 2 - && !matches!(arg.layout.backend_repr, BackendRepr::Vector { .. }) + && arg.layout.is_aggregate() { - // Return values larger than 2 registers using a return area + // Return aggregate values larger than 2 registers using a return area // pointer. LLVM and Cranelift disagree about how to return // values that don't fit in the registers designated for return // values. LLVM will force the entire return value to be passed @@ -813,8 +816,6 @@ impl<'a, Ty> FnAbi<'a, Ty> { // rustc_target already ensure any return value which doesn't // fit in the available amount of return registers is passed in // the right way for the current target. - // The adjustment is also not necessary nor desired for types with - // a vector representation; those are handled below. arg.make_indirect(); continue; } diff --git a/compiler/rustc_target/src/callconv/x86.rs b/compiler/rustc_target/src/callconv/x86.rs index cd8465c09ca98..805c4e1fee33e 100644 --- a/compiler/rustc_target/src/callconv/x86.rs +++ b/compiler/rustc_target/src/callconv/x86.rs @@ -2,8 +2,8 @@ use crate::abi::call::{ArgAttribute, FnAbi, PassMode, Reg, RegKind}; use crate::abi::{ AddressSpace, Align, BackendRepr, Float, HasDataLayout, Pointer, TyAbiInterface, TyAndLayout, }; -use crate::spec::HasTargetSpec; use crate::spec::abi::Abi as SpecAbi; +use crate::spec::{HasTargetSpec, RustAbi}; #[derive(PartialEq)] pub(crate) enum Flavor { @@ -234,8 +234,14 @@ where _ => false, // anyway not passed via registers on x86 }; if has_float { - if fn_abi.ret.layout.size <= Pointer(AddressSpace::DATA).size(cx) { - // Same size or smaller than pointer, return in a register. + if cx.target_spec().rust_abi == Some(RustAbi::X86Sse2) + && fn_abi.ret.layout.backend_repr.is_scalar() + && fn_abi.ret.layout.size.bits() <= 128 + { + // This is a single scalar that fits into an SSE register. + fn_abi.ret.cast_to(Reg { kind: RegKind::Vector, size: fn_abi.ret.layout.size }); + } else if fn_abi.ret.layout.size <= Pointer(AddressSpace::DATA).size(cx) { + // Same size or smaller than pointer, return in an integer register. fn_abi.ret.cast_to(Reg { kind: RegKind::Integer, size: fn_abi.ret.layout.size }); } else { // Larger than a pointer, return indirectly. diff --git a/tests/assembly/x86-return-float.rs b/tests/assembly/x86-return-float.rs index acd1af8d38af1..0802116bf61d1 100644 --- a/tests/assembly/x86-return-float.rs +++ b/tests/assembly/x86-return-float.rs @@ -1,19 +1,31 @@ //@ assembly-output: emit-asm -//@ only-x86 -// FIXME(#114479): LLVM miscompiles loading and storing `f32` and `f64` when SSE is disabled. -// There's no compiletest directive to ignore a test on i586 only, so just always explicitly enable -// SSE2. -// Use the same target CPU as `i686` so that LLVM orders the instructions in the same order. -//@ compile-flags: -Ctarget-feature=+sse2 -Ctarget-cpu=pentium4 +//@ revisions: sse nosse +//@[sse] compile-flags: --target i686-unknown-linux-gnu +//@[sse] needs-llvm-components: x86 +// We make SSE available but don't use it for the ABI. +//@[nosse] compile-flags: --target i586-unknown-linux-gnu -Ctarget-feature=+sse2 -Ctarget-cpu=pentium4 +//@[nosse] needs-llvm-components: x86 + // Force frame pointers to make ASM more consistent between targets //@ compile-flags: -O -C force-frame-pointers //@ filecheck-flags: --implicit-check-not fld --implicit-check-not fst -//@ revisions: normal win -//@[normal] ignore-windows -//@[win] only-windows -#![crate_type = "lib"] #![feature(f16, f128)] +#![feature(no_core, lang_items, rustc_attrs, repr_simd)] +#![no_core] +#![crate_type = "lib"] + +#[lang = "sized"] +trait Sized {} + +#[lang = "copy"] +trait Copy {} + +impl Copy for f16 {} +impl Copy for f32 {} +impl Copy for f64 {} +impl Copy for f128 {} +impl Copy for usize {} // Tests that returning `f32` and `f64` with the "Rust" ABI on 32-bit x86 doesn't use the x87 // floating point stack, as loading and storing `f32`s and `f64`s to and from the x87 stack quietens @@ -24,7 +36,8 @@ // CHECK-LABEL: return_f32: #[no_mangle] pub fn return_f32(x: f32) -> f32 { - // CHECK: movl {{.*}}(%ebp), %eax + // sse: movss {{.*}}(%ebp), %xmm0 + // nosse: movl {{.*}}(%ebp), %eax // CHECK-NOT: ax // CHECK: retl x @@ -33,9 +46,11 @@ pub fn return_f32(x: f32) -> f32 { // CHECK-LABEL: return_f64: #[no_mangle] pub fn return_f64(x: f64) -> f64 { - // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]] - // CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL:.*]] - // CHECK-NEXT: movsd %[[VAL]], (%[[PTR]]) + // nosse: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]] + // nosse-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL:.*]] + // nosse-NEXT: movsd %[[VAL]], (%[[PTR]]) + // sse: movsd {{.*}}(%ebp), %xmm0 + // sse-NOT: ax // CHECK: retl x } @@ -148,7 +163,8 @@ pub unsafe fn call_f32(x: &mut f32) { } // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]] // CHECK: calll {{()|_}}get_f32 - // CHECK-NEXT: movl %eax, (%[[PTR]]) + // sse-NEXT: movss %xmm0, (%[[PTR]]) + // nosse-NEXT: movl %eax, (%[[PTR]]) *x = get_f32(); } @@ -160,8 +176,9 @@ pub unsafe fn call_f64(x: &mut f64) { } // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]] // CHECK: calll {{()|_}}get_f64 - // CHECK: movsd {{.*}}(%{{ebp|esp}}), %[[VAL:.*]] - // CHECK-NEXT: movsd %[[VAL:.*]], (%[[PTR]]) + // sse: movlps %xmm0, (%[[PTR]]) + // nosse: movsd {{.*}}(%{{ebp|esp}}), %[[VAL:.*]] + // nosse-NEXT: movsd %[[VAL:.*]], (%[[PTR]]) *x = get_f64(); } @@ -190,10 +207,8 @@ pub unsafe fn call_f64_f64(x: &mut (f64, f64)) { } // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]] // CHECK: calll {{()|_}}get_f64_f64 - // normal: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]] - // normal-NEXT: movsd [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]] - // win: movsd (%esp), %[[VAL1:.*]] - // win-NEXT: movsd 8(%esp), %[[VAL2:.*]] + // CHECK: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]] + // CHECK-NEXT: movsd [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]] // CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]]) // CHECK-NEXT: movsd %[[VAL2]], 8(%[[PTR]]) *x = get_f64_f64(); @@ -207,13 +222,10 @@ pub unsafe fn call_f32_f64(x: &mut (f32, f64)) { } // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]] // CHECK: calll {{()|_}}get_f32_f64 - // normal: movss [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]] - // normal-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]] - // win: movss (%esp), %[[VAL1:.*]] - // win-NEXT: movsd 8(%esp), %[[VAL2:.*]] + // CHECK: movss [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]] + // CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]] // CHECK-NEXT: movss %[[VAL1]], (%[[PTR]]) - // normal-NEXT: movsd %[[VAL2]], 4(%[[PTR]]) - // win-NEXT: movsd %[[VAL2]], 8(%[[PTR]]) + // CHECK-NEXT: movsd %[[VAL2]], 4(%[[PTR]]) *x = get_f32_f64(); } @@ -225,10 +237,8 @@ pub unsafe fn call_f64_f32(x: &mut (f64, f32)) { } // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]] // CHECK: calll {{()|_}}get_f64_f32 - // normal: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]] - // normal-NEXT: movss [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]] - // win: movsd (%esp), %[[VAL1:.*]] - // win-NEXT: movss 8(%esp), %[[VAL2:.*]] + // CHECK: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]] + // CHECK-NEXT: movss [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]] // CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]]) // CHECK-NEXT: movss %[[VAL2]], 8(%[[PTR]]) *x = get_f64_f32(); @@ -257,10 +267,8 @@ pub unsafe fn call_f64_other(x: &mut (f64, usize)) { } // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]] // CHECK: calll {{()|_}}get_f64_other - // normal: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]] - // normal-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]] - // win: movsd (%esp), %[[VAL1:.*]] - // win-NEXT: movl 8(%esp), %[[VAL2:.*]] + // CHECK: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]] + // CHECK-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]] // CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]]) // CHECK-NEXT: movl %[[VAL2]], 8(%[[PTR]]) *x = get_f64_other(); @@ -289,13 +297,10 @@ pub unsafe fn call_other_f64(x: &mut (usize, f64)) { } // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]] // CHECK: calll {{()|_}}get_other_f64 - // normal: movl [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]] - // normal-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]] - // win: movl (%esp), %[[VAL1:.*]] - // win-NEXT: movsd 8(%esp), %[[VAL2:.*]] + // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]] + // CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]] // CHECK-NEXT: movl %[[VAL1]], (%[[PTR]]) - // normal-NEXT: movsd %[[VAL2]], 4(%[[PTR]]) - // win-NEXT: movsd %[[VAL2]], 8(%[[PTR]]) + // CHECK-NEXT: movsd %[[VAL2]], 4(%[[PTR]]) *x = get_other_f64(); } @@ -307,7 +312,8 @@ pub unsafe fn call_other_f64(x: &mut (usize, f64)) { pub fn return_f16(x: f16) -> f16 { // CHECK: pushl %ebp // CHECK: movl %esp, %ebp - // CHECK: movzwl 8(%ebp), %eax + // nosse: movzwl 8(%ebp), %eax + // sse: pinsrw $0, 8(%ebp), %xmm0 // CHECK: popl %ebp // CHECK: retl x @@ -316,15 +322,18 @@ pub fn return_f16(x: f16) -> f16 { // CHECK-LABEL: return_f128: #[no_mangle] pub fn return_f128(x: f128) -> f128 { - // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]] - // CHECK-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]] - // CHECK-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]] - // CHECK-NEXT: movl [[#%d,OFFSET+12]](%ebp), %[[VAL3:.*]] - // CHECK-NEXT: movl [[#%d,OFFSET+16]](%ebp), %[[VAL4:.*]] - // CHECK-NEXT: movl %[[VAL4:.*]] 12(%[[PTR]]) - // CHECK-NEXT: movl %[[VAL3:.*]] 8(%[[PTR]]) - // CHECK-NEXT: movl %[[VAL2:.*]] 4(%[[PTR]]) - // CHECK-NEXT: movl %[[VAL1:.*]] (%[[PTR]]) + // CHECK: pushl %ebp + // sse: movaps [[#%d,OFFSET:]](%ebp), %xmm0 + // nosse: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]] + // nosse-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]] + // nosse-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]] + // nosse-NEXT: movl [[#%d,OFFSET+12]](%ebp), %[[VAL3:.*]] + // nosse-NEXT: movl [[#%d,OFFSET+16]](%ebp), %[[VAL4:.*]] + // nosse-NEXT: movl %[[VAL4:.*]] 12(%[[PTR]]) + // nosse-NEXT: movl %[[VAL3:.*]] 8(%[[PTR]]) + // nosse-NEXT: movl %[[VAL2:.*]] 4(%[[PTR]]) + // nosse-NEXT: movl %[[VAL1:.*]] (%[[PTR]]) + // CHECK: popl %ebp // CHECK: retl x }