Skip to content

Commit 20e5927

Browse files
committed
Gate mul_add() behind FMA3 check
It's crazy slow without FMA3 and the compiler won't auto change between FMA or not because *technically* it changes the results and that's a sin in the Rust bible
1 parent f51e096 commit 20e5927

File tree

1 file changed

+13
-4
lines changed

1 file changed

+13
-4
lines changed

src/lib.rs

+13-4
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,26 @@ trait DT:
3333
Sized + Copy + Add<Output = Self> + Div<Output = Self> + Mul<Output = Self> + Sub<Output = Self> + Rem<Output = Self>
3434
{
3535
fn f32(b: f32) -> Self;
36-
fn fma(self, mul: Self, add: Self) -> Self;
36+
fn _fma(self, mul: Self, add: Self) -> Self;
3737
fn powf(self, b: Self) -> Self;
3838
fn branch<F: FnOnce() -> Self, G: FnOnce() -> Self>(self, b: Self, cmp: Cmp, x: F, y: G) -> Self;
39+
40+
fn fma(self, mul: Self, add: Self) -> Self {
41+
// other non-x86 names?
42+
if cfg!(target_feature = "fma") {
43+
self._fma(mul, add) // crazy slow without FMA3
44+
} else {
45+
self * mul + add
46+
}
47+
}
3948
}
4049

4150
impl DT for f32 {
4251
fn f32(b: f32) -> Self {
4352
b
4453
}
4554

46-
fn fma(self, mul: Self, add: Self) -> Self {
55+
fn _fma(self, mul: Self, add: Self) -> Self {
4756
self.mul_add(mul, add)
4857
}
4958

@@ -70,7 +79,7 @@ impl DT for f64 {
7079
b.into()
7180
}
7281

73-
fn fma(self, mul: Self, add: Self) -> Self {
82+
fn _fma(self, mul: Self, add: Self) -> Self {
7483
self.mul_add(mul, add)
7584
}
7685

@@ -101,7 +110,7 @@ where
101110
Self::splat(object)
102111
}
103112

104-
fn fma(self, mul: Self, add: Self) -> Self {
113+
fn _fma(self, mul: Self, add: Self) -> Self {
105114
self.mul_add(mul, add)
106115
}
107116

0 commit comments

Comments
 (0)