Skip to content

Commit ad60e5d

Browse files
committed
Attempt proper SIMD unweave
Fail. 1000x slower
1 parent 20e5927 commit ad60e5d

File tree

3 files changed

+51
-2
lines changed

3 files changed

+51
-2
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ publish = true
1515

1616
[features]
1717
nightly = []
18-
default = []
18+
default = ["nightly"]
1919

2020
[dependencies]
2121

benches/conversions.rs

+14-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,20 @@ pub fn conversions(c: &mut Criterion) {
2727
} ));
2828

2929
c.bench_function("lrgb_to_xyz_simd", |b| b.iter(|| {
30-
black_box(pixels.clone().as_simd_mut::<32>().1.chunks_exact_mut(3).for_each(|pixel| colcon::lrgb_to_xyz(pixel.try_into().unwrap())));
30+
black_box({
31+
let mut pixels_simd = pixels.clone();
32+
let mut unwoven = colcon::unweave_simd::<3, 8>(&pixels_simd);
33+
34+
let [mut rs, mut gs, mut bs] = unwoven;
35+
36+
for (r, (g, b)) in rs.1.iter_mut().zip(gs.1.iter_mut().zip(bs.1.iter_mut())) {
37+
let mut arr = [*r, *g, *b];
38+
colcon::lrgb_to_xyz(&mut arr);
39+
}
40+
41+
//pixels_simd = colcon::weave(unwoven);
42+
43+
});
3144
} ));
3245

3346
c.bench_function("xyz_to_cielab", |b| b.iter(|| {

src/lib.rs

+36
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#![cfg_attr(feature = "nightly", feature(portable_simd))]
2+
#![cfg_attr(feature = "nightly", feature(slice_as_chunks))]
23
#![warn(missing_docs)]
34

45
//! Simple colorspace conversions in pure Rust.
@@ -133,6 +134,41 @@ where
133134
}
134135
}
135136

137+
#[cfg(feature = "nightly")]
138+
/// Create an array of separate channel buffers from a single interwoven buffer.
139+
/// Copies the data.
140+
pub fn unweave_simd<'a, const C: usize, const L: usize>(slice: &[f32]) -> [(Box<[f32]>, Box<[Simd<f32, L>]>); C]
141+
where
142+
LaneCount<L>: SupportedLaneCount,
143+
{
144+
let len = slice.len() / (C * L);
145+
let mut result: [Vec<Simd<f32, L>>; C] = (0..C)
146+
.map(|_| Vec::with_capacity(len))
147+
.collect::<Vec<Vec<_>>>()
148+
.try_into()
149+
.unwrap();
150+
151+
//let chunks = slice.as_chunks::<C>();
152+
//for chunk in chunks.0.
153+
//let mut remainders: [Box<[f32]>; C] = [Box::new([]), Box::new([]), Box::new([])];
154+
for chunk in slice.chunks(C * L) {
155+
if chunk.len() == C * L {
156+
for c in 0..C {
157+
result[c].push(Simd::from_slice(
158+
&(0..L).map(|l| chunk[c + l * c]).collect::<Vec<f32>>(),
159+
));
160+
}
161+
}
162+
}
163+
164+
result
165+
.into_iter()
166+
.map(|v| (Vec::new().into_boxed_slice(), v.into_boxed_slice()))
167+
.collect::<Vec<(Box<[f32]>, Box<[Simd<f32, L>]>)>>()
168+
.try_into()
169+
.unwrap()
170+
}
171+
136172
/// Create an array of separate channel buffers from a single interwoven buffer.
137173
/// Copies the data.
138174
pub fn unweave<const N: usize>(slice: &[f32]) -> [Box<[f32]>; N] {

0 commit comments

Comments
 (0)