Skip to content

Commit 30ece8d

Browse files
committed
Make tiny-skia work on arm64
1 parent d27c67c commit 30ece8d

File tree

2 files changed

+127
-0
lines changed

2 files changed

+127
-0
lines changed

src/intrinsics/llvm.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,14 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
5454
);
5555
}
5656

57+
"llvm.fptosi.sat.v4i32.v4f32" => {
58+
intrinsic_args!(fx, args => (a); intrinsic);
59+
60+
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
61+
fx.bcx.ins().fcvt_to_sint_sat(types::I32, lane)
62+
});
63+
}
64+
5765
_ => {
5866
fx.tcx
5967
.dcx()

src/intrinsics/llvm_aarch64.rs

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
//! Emulate AArch64 LLVM intrinsics
22
3+
use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece};
4+
use rustc_target::asm::*;
5+
6+
use crate::inline_asm::{CInlineAsmOperand, codegen_inline_asm_inner};
37
use crate::intrinsics::*;
48
use crate::prelude::*;
59

@@ -49,6 +53,121 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
4953
});
5054
}
5155

56+
"llvm.aarch64.neon.fcvtns.v4i32.v4f32" => {
57+
intrinsic_args!(fx, args => (a); intrinsic);
58+
59+
// Note: Using inline asm instead of fcvt_to_sint as the latter rounds to zero rather than to nearest
60+
61+
let a_ptr = a.force_stack(fx).0.get_addr(fx);
62+
let res_place = CPlace::new_stack_slot(fx, ret.layout());
63+
let res_ptr = res_place.to_ptr().get_addr(fx);
64+
65+
codegen_inline_asm_inner(
66+
fx,
67+
&[InlineAsmTemplatePiece::String(
68+
"ldr q0, [x0]
69+
fcvtns v0.4s, v0.4s
70+
str q0, [x1]"
71+
.into(),
72+
)],
73+
&[
74+
CInlineAsmOperand::In {
75+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
76+
AArch64InlineAsmReg::x0,
77+
)),
78+
value: a_ptr,
79+
},
80+
CInlineAsmOperand::In {
81+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
82+
AArch64InlineAsmReg::x1,
83+
)),
84+
value: res_ptr,
85+
},
86+
],
87+
InlineAsmOptions::NOSTACK,
88+
);
89+
let res = res_place.to_cvalue(fx);
90+
ret.write_cvalue_transmute(fx, res);
91+
}
92+
93+
"llvm.aarch64.neon.frecpe.v4f32" => {
94+
intrinsic_args!(fx, args => (a); intrinsic);
95+
96+
let a_ptr = a.force_stack(fx).0.get_addr(fx);
97+
let res_place = CPlace::new_stack_slot(fx, ret.layout());
98+
let res_ptr = res_place.to_ptr().get_addr(fx);
99+
100+
codegen_inline_asm_inner(
101+
fx,
102+
&[InlineAsmTemplatePiece::String(
103+
"ldr q0, [x0]
104+
frecpe v0.4s, v0.4s
105+
str q0, [x1]"
106+
.into(),
107+
)],
108+
&[
109+
CInlineAsmOperand::In {
110+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
111+
AArch64InlineAsmReg::x0,
112+
)),
113+
value: a_ptr,
114+
},
115+
CInlineAsmOperand::In {
116+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
117+
AArch64InlineAsmReg::x1,
118+
)),
119+
value: res_ptr,
120+
},
121+
],
122+
InlineAsmOptions::NOSTACK,
123+
);
124+
let res = res_place.to_cvalue(fx);
125+
ret.write_cvalue_transmute(fx, res);
126+
}
127+
128+
"llvm.aarch64.neon.frecps.v4f32" => {
129+
intrinsic_args!(fx, args => (a, b); intrinsic);
130+
131+
let a_ptr = a.force_stack(fx).0.get_addr(fx);
132+
let b_ptr = b.force_stack(fx).0.get_addr(fx);
133+
let res_place = CPlace::new_stack_slot(fx, ret.layout());
134+
let res_ptr = res_place.to_ptr().get_addr(fx);
135+
136+
codegen_inline_asm_inner(
137+
fx,
138+
&[InlineAsmTemplatePiece::String(
139+
"ldr q0, [x0]
140+
ldr q1, [x1]
141+
frecps v0.4s, v0.4s, v1.4s
142+
str q0, [x2]"
143+
.into(),
144+
)],
145+
&[
146+
CInlineAsmOperand::In {
147+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
148+
AArch64InlineAsmReg::x0,
149+
)),
150+
value: a_ptr,
151+
},
152+
CInlineAsmOperand::In {
153+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
154+
AArch64InlineAsmReg::x1,
155+
)),
156+
value: b_ptr,
157+
},
158+
CInlineAsmOperand::In {
159+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
160+
AArch64InlineAsmReg::x2,
161+
)),
162+
value: res_ptr,
163+
},
164+
],
165+
InlineAsmOptions::NOSTACK,
166+
);
167+
let res = res_place.to_cvalue(fx);
168+
ret.write_cvalue_transmute(fx, res);
169+
}
170+
52171
_ if intrinsic.starts_with("llvm.aarch64.neon.sqadd.v")
53172
|| intrinsic.starts_with("llvm.aarch64.neon.uqadd.v") =>
54173
{

0 commit comments

Comments
 (0)