Skip to content

Commit 6491107

Browse files
Merge pull request #395 from rust-lang/swizzle_dyn_fix
Fix swizzle_dyn
2 parents e06e54c + eee4f1d commit 6491107

File tree

1 file changed

+12
-32
lines changed

1 file changed

+12
-32
lines changed

crates/core_simd/src/swizzle_dyn.rs

+12-32
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ where
4444
))]
4545
8 => transize(vtbl1_u8, self, idxs),
4646
#[cfg(target_feature = "ssse3")]
47-
16 => transize(x86::_mm_shuffle_epi8, self, idxs),
47+
16 => transize(x86::_mm_shuffle_epi8, self, zeroing_idxs(idxs)),
4848
#[cfg(target_feature = "simd128")]
4949
16 => transize(wasm::i8x16_swizzle, self, idxs),
5050
#[cfg(all(
@@ -54,9 +54,9 @@ where
5454
))]
5555
16 => transize(vqtbl1q_u8, self, idxs),
5656
#[cfg(all(target_feature = "avx2", not(target_feature = "avx512vbmi")))]
57-
32 => transize_raw(avx2_pshufb, self, idxs),
57+
32 => transize(avx2_pshufb, self, idxs),
5858
#[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))]
59-
32 => transize(x86::_mm256_permutexvar_epi8, self, idxs),
59+
32 => transize(x86::_mm256_permutexvar_epi8, zeroing_idxs(idxs), self),
6060
// Notable absence: avx512bw shuffle
6161
// If avx512bw is available, odds of avx512vbmi are good
6262
// FIXME: initial AVX512VBMI variant didn't actually pass muster
@@ -129,45 +129,25 @@ unsafe fn avx2_pshufb(bytes: Simd<u8, 32>, idxs: Simd<u8, 32>) -> Simd<u8, 32> {
129129
#[inline(always)]
130130
unsafe fn transize<T, const N: usize>(
131131
f: unsafe fn(T, T) -> T,
132-
bytes: Simd<u8, N>,
133-
idxs: Simd<u8, N>,
132+
a: Simd<u8, N>,
133+
b: Simd<u8, N>,
134134
) -> Simd<u8, N>
135135
where
136136
LaneCount<N>: SupportedLaneCount,
137137
{
138-
let idxs = zeroing_idxs(idxs);
139138
// SAFETY: Same obligation to use this function as to use mem::transmute_copy.
140-
unsafe { mem::transmute_copy(&f(mem::transmute_copy(&bytes), mem::transmute_copy(&idxs))) }
139+
unsafe { mem::transmute_copy(&f(mem::transmute_copy(&a), mem::transmute_copy(&b))) }
141140
}
142141

143-
/// Make indices that yield 0 for this architecture
142+
/// Make indices that yield 0 for x86
143+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
144+
#[allow(unused)]
144145
#[inline(always)]
145146
fn zeroing_idxs<const N: usize>(idxs: Simd<u8, N>) -> Simd<u8, N>
146147
where
147148
LaneCount<N>: SupportedLaneCount,
148149
{
149-
// On x86, make sure the top bit is set.
150-
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
151-
let idxs = {
152-
use crate::simd::cmp::SimdPartialOrd;
153-
idxs.simd_lt(Simd::splat(N as u8))
154-
.select(idxs, Simd::splat(u8::MAX))
155-
};
156-
// Simply do nothing on most architectures.
157-
idxs
158-
}
159-
160-
/// As transize but no implicit call to `zeroing_idxs`.
161-
#[allow(dead_code)]
162-
#[inline(always)]
163-
unsafe fn transize_raw<T, const N: usize>(
164-
f: unsafe fn(T, T) -> T,
165-
bytes: Simd<u8, N>,
166-
idxs: Simd<u8, N>,
167-
) -> Simd<u8, N>
168-
where
169-
LaneCount<N>: SupportedLaneCount,
170-
{
171-
// SAFETY: Same obligation to use this function as to use mem::transmute_copy.
172-
unsafe { mem::transmute_copy(&f(mem::transmute_copy(&bytes), mem::transmute_copy(&idxs))) }
150+
use crate::simd::cmp::SimdPartialOrd;
151+
idxs.simd_lt(Simd::splat(N as u8))
152+
.select(idxs, Simd::splat(u8::MAX))
173153
}

0 commit comments

Comments
 (0)