Skip to content

Commit e96880b

Browse files
committed
DT with only f64
Looks clean. Though I might want slices and arrays too? .into() won't work for that. New trait?
1 parent ad60e5d commit e96880b

File tree

4 files changed

+55
-187
lines changed

4 files changed

+55
-187
lines changed

Cargo.toml

-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@ publish = true
1414
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
1515

1616
[features]
17-
nightly = []
18-
default = ["nightly"]
1917

2018
[dependencies]
2119

benches/conversions.rs

-23
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
#![feature(portable_simd)]
21
use criterion::{black_box, criterion_group, criterion_main, Criterion};
32
use colcon::{Space, convert_space};
4-
//use std::simd::prelude::*;
53

64
fn pixels() -> Box<[f32]> {
75
let size = 512;
@@ -26,23 +24,6 @@ pub fn conversions(c: &mut Criterion) {
2624
black_box(pixels.clone().chunks_exact_mut(3).for_each(|pixel| colcon::lrgb_to_xyz(pixel.try_into().unwrap())));
2725
} ));
2826

29-
c.bench_function("lrgb_to_xyz_simd", |b| b.iter(|| {
30-
black_box({
31-
let mut pixels_simd = pixels.clone();
32-
let mut unwoven = colcon::unweave_simd::<3, 8>(&pixels_simd);
33-
34-
let [mut rs, mut gs, mut bs] = unwoven;
35-
36-
for (r, (g, b)) in rs.1.iter_mut().zip(gs.1.iter_mut().zip(bs.1.iter_mut())) {
37-
let mut arr = [*r, *g, *b];
38-
colcon::lrgb_to_xyz(&mut arr);
39-
}
40-
41-
//pixels_simd = colcon::weave(unwoven);
42-
43-
});
44-
} ));
45-
4627
c.bench_function("xyz_to_cielab", |b| b.iter(|| {
4728
black_box(pixels.clone().chunks_exact_mut(3).for_each(|pixel| colcon::xyz_to_cielab(pixel.try_into().unwrap())));
4829
} ));
@@ -95,10 +76,6 @@ pub fn conversions(c: &mut Criterion) {
9576
black_box(pixels.clone().iter_mut().for_each(|n| *n = colcon::srgb_eotf(*n)));
9677
} ));
9778

98-
c.bench_function("srgb_eotf_simd", |b| b.iter(|| {
99-
black_box(pixels.clone().as_simd_mut::<32>().1.iter_mut().for_each(|n| *n = colcon::srgb_eotf(*n)));
100-
} ));
101-
10279
c.bench_function("srgb_eotf_inverse", |b| b.iter(|| {
10380
black_box(pixels.clone().iter_mut().for_each(|n| *n = colcon::srgb_eotf_inverse(*n)));
10481
} ));

scripts/test_ctypes.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
# up
2020
colcon.srgb_to_hsv.argtypes = [cpixel]
2121
colcon.srgb_to_lrgb.argtypes = [cpixel]
22-
colcon.lrgb_to_xyz.argtypes = [cpixel]
23-
colcon.xyz_to_lab.argtypes = [cpixel]
22+
colcon.lrgb_to_xyz_f32.argtypes = [cpixel]
23+
colcon.xyz_to_cielab.argtypes = [cpixel]
2424
colcon.xyz_to_oklab.argtypes = [cpixel]
2525
colcon.xyz_to_jzazbz.argtypes = [cpixel]
2626
colcon.lab_to_lch.argtypes = [cpixel]
@@ -29,14 +29,14 @@
2929
colcon.lch_to_lab.argtypes = [cpixel]
3030
colcon.jzazbz_to_xyz.argtypes = [cpixel]
3131
colcon.oklab_to_xyz.argtypes = [cpixel]
32-
colcon.lab_to_xyz.argtypes = [cpixel]
32+
colcon.cielab_to_xyz.argtypes = [cpixel]
3333
colcon.xyz_to_lrgb.argtypes = [cpixel]
3434
colcon.lrgb_to_srgb.argtypes = [cpixel]
3535
colcon.srgb_to_hsv.argtypes = [cpixel]
3636

3737
# extra
38-
colcon.srgb_eotf.argtypes = [ctypes.c_float]
39-
colcon.srgb_eotf.restype = ctypes.c_float
38+
colcon.srgb_eotf_f32.argtypes = [ctypes.c_float]
39+
colcon.srgb_eotf_f32.restype = ctypes.c_float
4040
colcon.srgb_eotf_inverse.argtypes = [ctypes.c_float]
4141
colcon.srgb_eotf_inverse.restype = ctypes.c_float
4242
colcon.hk_high2023.argtypes = [cpixel]
@@ -68,11 +68,11 @@ def pixcmp(a, b):
6868
pixcmp(list(pix), LRGB)
6969

7070
pix = cpixel(*LRGB)
71-
colcon.lrgb_to_xyz(pix)
71+
colcon.lrgb_to_xyz_f32(pix)
7272
pixcmp(list(pix), XYZ)
7373

7474
pix = cpixel(*XYZ)
75-
colcon.xyz_to_lab(pix)
75+
colcon.xyz_to_cielab(pix)
7676
pixcmp(list(pix), LAB)
7777

7878
pix = cpixel(*XYZ)
@@ -93,7 +93,7 @@ def pixcmp(a, b):
9393
pixcmp(list(pix), LAB)
9494

9595
pix = cpixel(*LAB)
96-
colcon.lab_to_xyz(pix)
96+
colcon.cielab_to_xyz(pix)
9797
pixcmp(list(pix), XYZ)
9898

9999
pix = cpixel(*JZAZBZ)

src/lib.rs

+47-154
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
#![cfg_attr(feature = "nightly", feature(portable_simd))]
2-
#![cfg_attr(feature = "nightly", feature(slice_as_chunks))]
1+
#![allow(private_bounds)]
32
#![warn(missing_docs)]
43

54
//! Simple colorspace conversions in pure Rust.
@@ -10,33 +9,27 @@
109
//!
1110
//! This crate references CIE Standard Illuminant D65 for functions to/from CIE XYZ
1211
12+
use core::cmp::PartialOrd;
1313
use core::ffi::{c_char, CStr};
1414
use core::ops::{Add, Div, Mul, Rem, Sub};
1515

16-
#[cfg(feature = "nightly")]
17-
use std::simd::prelude::*;
18-
19-
#[cfg(feature = "nightly")]
20-
use std::simd::{LaneCount, StdFloat, SupportedLaneCount};
21-
2216
fn spowf(n: f32, power: f32) -> f32 {
2317
n.abs().powf(power).copysign(n)
2418
}
2519

26-
enum Cmp {
27-
Gt,
28-
Lt,
29-
Ge,
30-
Le,
31-
}
32-
3320
trait DT:
34-
Sized + Copy + Add<Output = Self> + Div<Output = Self> + Mul<Output = Self> + Sub<Output = Self> + Rem<Output = Self>
21+
Sized
22+
+ Copy
23+
+ Add<Output = Self>
24+
+ Div<Output = Self>
25+
+ Mul<Output = Self>
26+
+ Sub<Output = Self>
27+
+ Rem<Output = Self>
28+
+ PartialOrd
29+
+ From<f32>
3530
{
36-
fn f32(b: f32) -> Self;
3731
fn _fma(self, mul: Self, add: Self) -> Self;
38-
fn powf(self, b: Self) -> Self;
39-
fn branch<F: FnOnce() -> Self, G: FnOnce() -> Self>(self, b: Self, cmp: Cmp, x: F, y: G) -> Self;
32+
fn powf(self, rhs: Self) -> Self;
4033

4134
fn fma(self, mul: Self, add: Self) -> Self {
4235
// other non-x86 names?
@@ -48,127 +41,22 @@ trait DT:
4841
}
4942
}
5043

51-
impl DT for f32 {
52-
fn f32(b: f32) -> Self {
53-
b
54-
}
55-
56-
fn _fma(self, mul: Self, add: Self) -> Self {
57-
self.mul_add(mul, add)
58-
}
59-
60-
fn powf(self, b: Self) -> Self {
61-
self.powf(b)
62-
}
63-
64-
fn branch<F: FnOnce() -> Self, G: FnOnce() -> Self>(self, b: Self, cmp: Cmp, x: F, y: G) -> Self {
65-
if match cmp {
66-
Cmp::Gt => self > b,
67-
Cmp::Lt => self < b,
68-
Cmp::Ge => self >= b,
69-
Cmp::Le => self <= b,
70-
} {
71-
x()
72-
} else {
73-
y()
74-
}
75-
}
76-
}
77-
78-
impl DT for f64 {
79-
fn f32(b: f32) -> Self {
80-
b.into()
81-
}
82-
83-
fn _fma(self, mul: Self, add: Self) -> Self {
84-
self.mul_add(mul, add)
85-
}
86-
87-
fn powf(self, b: Self) -> Self {
88-
self.powf(b)
89-
}
90-
91-
fn branch<F: FnOnce() -> Self, G: FnOnce() -> Self>(self, b: Self, cmp: Cmp, x: F, y: G) -> Self {
92-
if match cmp {
93-
Cmp::Gt => self > b,
94-
Cmp::Lt => self < b,
95-
Cmp::Ge => self >= b,
96-
Cmp::Le => self <= b,
97-
} {
98-
x()
99-
} else {
100-
y()
101-
}
102-
}
103-
}
104-
105-
#[cfg(feature = "nightly")]
106-
impl<const N: usize> DT for Simd<f32, N>
107-
where
108-
LaneCount<N>: SupportedLaneCount,
109-
{
110-
fn f32(object: f32) -> Self {
111-
Self::splat(object)
112-
}
113-
114-
fn _fma(self, mul: Self, add: Self) -> Self {
115-
self.mul_add(mul, add)
116-
}
117-
118-
fn powf(mut self, b: Self) -> Self {
119-
self.as_mut_array()
120-
.iter_mut()
121-
.zip(b.as_array().iter())
122-
.for_each(|(a, b)| *a = a.powf(*b));
123-
self
124-
}
125-
126-
fn branch<F: FnOnce() -> Self, G: FnOnce() -> Self>(self, b: Self, cmp: Cmp, x: F, y: G) -> Self {
127-
match cmp {
128-
Cmp::Gt => self.simd_gt(b),
129-
Cmp::Lt => self.simd_lt(b),
130-
Cmp::Ge => self.simd_ge(b),
131-
Cmp::Le => self.simd_le(b),
132-
}
133-
.select(x(), y())
134-
}
135-
}
136-
137-
#[cfg(feature = "nightly")]
138-
/// Create an array of separate channel buffers from a single interwoven buffer.
139-
/// Copies the data.
140-
pub fn unweave_simd<'a, const C: usize, const L: usize>(slice: &[f32]) -> [(Box<[f32]>, Box<[Simd<f32, L>]>); C]
141-
where
142-
LaneCount<L>: SupportedLaneCount,
143-
{
144-
let len = slice.len() / (C * L);
145-
let mut result: [Vec<Simd<f32, L>>; C] = (0..C)
146-
.map(|_| Vec::with_capacity(len))
147-
.collect::<Vec<Vec<_>>>()
148-
.try_into()
149-
.unwrap();
150-
151-
//let chunks = slice.as_chunks::<C>();
152-
//for chunk in chunks.0.
153-
//let mut remainders: [Box<[f32]>; C] = [Box::new([]), Box::new([]), Box::new([])];
154-
for chunk in slice.chunks(C * L) {
155-
if chunk.len() == C * L {
156-
for c in 0..C {
157-
result[c].push(Simd::from_slice(
158-
&(0..L).map(|l| chunk[c + l * c]).collect::<Vec<f32>>(),
159-
));
44+
macro_rules! impl_float {
45+
($type:ident) => {
46+
impl DT for $type {
47+
fn _fma(self, mul: Self, add: Self) -> Self {
48+
self.mul_add(mul, add)
49+
}
50+
fn powf(self, rhs: Self) -> Self {
51+
self.powf(rhs)
16052
}
16153
}
162-
}
163-
164-
result
165-
.into_iter()
166-
.map(|v| (Vec::new().into_boxed_slice(), v.into_boxed_slice()))
167-
.collect::<Vec<(Box<[f32]>, Box<[Simd<f32, L>]>)>>()
168-
.try_into()
169-
.unwrap()
54+
};
17055
}
17156

57+
impl_float!(f32);
58+
impl_float!(f64);
59+
17260
/// Create an array of separate channel buffers from a single interwoven buffer.
17361
/// Copies the data.
17462
pub fn unweave<const N: usize>(slice: &[f32]) -> [Box<[f32]>; N] {
@@ -336,9 +224,9 @@ fn matmul3t(pixel: [f32; 3], matrix: [[f32; 3]; 3]) -> [f32; 3] {
336224
/// Transposed 3 * 3x3 matrix multiply, ie matrix @ pixel
337225
fn matmul3<T: DT>(m: [[f32; 3]; 3], p: [T; 3]) -> [T; 3] {
338226
[
339-
p[0].fma(DT::f32(m[0][0]), p[1].fma(DT::f32(m[0][1]), p[2] * DT::f32(m[0][2]))),
340-
p[0].fma(DT::f32(m[1][0]), p[1].fma(DT::f32(m[1][1]), p[2] * DT::f32(m[1][2]))),
341-
p[0].fma(DT::f32(m[2][0]), p[1].fma(DT::f32(m[2][1]), p[2] * DT::f32(m[2][2]))),
227+
p[0].fma(m[0][0].into(), p[1].fma(m[0][1].into(), p[2] * m[0][2].into())),
228+
p[0].fma(m[1][0].into(), p[1].fma(m[1][1].into(), p[2] * m[1][2].into())),
229+
p[0].fma(m[2][0].into(), p[1].fma(m[2][1].into(), p[2] * m[2][2].into())),
342230
]
343231
}
344232
// ### MATRICES ### }}}
@@ -348,22 +236,22 @@ fn matmul3<T: DT>(m: [[f32; 3]; 3], p: [T; 3]) -> [T; 3] {
348236
/// sRGB Electro-Optical Transfer Function
349237
///
350238
/// <https://en.wikipedia.org/wiki/SRGB#Computing_the_transfer_function>
351-
//#[no_mangle]
352-
//pub fn srgb_eotf<T: DType>(n: T) -> T {
353-
// if n <= SRGBEOTF_CHI {
354-
// n / SRGBEOTF_PHI
355-
// } else {
356-
// ((n + SRGBEOTF_ALPHA) / (SRGBEOTF_ALPHA + 1.0)).powf(SRGBEOTF_GAMMA)
357-
// }
358-
//}
359-
360239
pub fn srgb_eotf<T: DT>(n: T) -> T {
361-
n.branch(
362-
DT::f32(SRGBEOTF_CHI),
363-
Cmp::Le,
364-
|| n / DT::f32(SRGBEOTF_PHI),
365-
|| ((n + DT::f32(SRGBEOTF_ALPHA)) / DT::f32(SRGBEOTF_ALPHA + 1.0)).powf(DT::f32(SRGBEOTF_GAMMA)),
366-
)
240+
if n <= SRGBEOTF_CHI.into() {
241+
n / SRGBEOTF_PHI.into()
242+
} else {
243+
((n + SRGBEOTF_ALPHA.into()) / (SRGBEOTF_ALPHA + 1.0).into()).powf(SRGBEOTF_GAMMA.into())
244+
}
245+
}
246+
247+
#[no_mangle]
248+
extern "C" fn srgb_eotf_f32(n: f32) -> f32 {
249+
srgb_eotf(n)
250+
}
251+
252+
#[no_mangle]
253+
extern "C" fn srgb_eotf_f64(n: f64) -> f64 {
254+
srgb_eotf(n)
367255
}
368256

369257
/// Inverse sRGB Electro-Optical Transfer Function
@@ -1085,6 +973,11 @@ extern "C" fn lrgb_to_xyz_f32(pixel: &mut [f32; 3]) {
1085973
lrgb_to_xyz(pixel)
1086974
}
1087975

976+
#[no_mangle]
977+
extern "C" fn lrgb_to_xyz_f64(pixel: &mut [f64; 3]) {
978+
lrgb_to_xyz(pixel)
979+
}
980+
1088981
/// Convert from CIE XYZ to CIE LAB.
1089982
///
1090983
/// <https://en.wikipedia.org/wiki/CIELAB_color_space#From_CIEXYZ_to_CIELAB>

0 commit comments

Comments
 (0)