diff --git a/crates/core_arch/src/aarch64/mod.rs b/crates/core_arch/src/aarch64/mod.rs index 0defde52fd..8020fde590 100644 --- a/crates/core_arch/src/aarch64/mod.rs +++ b/crates/core_arch/src/aarch64/mod.rs @@ -1,7 +1,7 @@ //! AArch64 intrinsics. //! -//! The reference for NEON is [ARM's NEON Intrinsics Reference][arm_ref]. The -//! [ARM's NEON Intrinsics Online Database][arm_dat] is also useful. +//! The reference for NEON is [Arm's NEON Intrinsics Reference][arm_ref]. The +//! [Arm's NEON Intrinsics Online Database][arm_dat] is also useful. //! //! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf //! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index 03c56ff0cb..30512e11bb 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -195,6 +195,17 @@ pub unsafe fn vabdd_f64(a: f64, b: f64) -> f64 { pub unsafe fn vabds_f32(a: f32, b: f32) -> f32 { simd_extract!(vabd_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) } +#[doc = "Floating-point absolute difference"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fabd))] +pub unsafe fn vabdh_f16(a: f16, b: f16) -> f16 { + simd_extract!(vabd_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) +} #[doc = "Signed Absolute difference Long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s16)"] #[doc = "## Safety"] @@ -1041,6 +1052,44 @@ pub unsafe fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x _vbcaxq_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned() } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcadd))] +pub unsafe fn vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot270.v4f16" + )] + fn _vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vcadd_rot270_f16(a, b) +} +#[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcadd))] +pub unsafe fn vcaddq_rot270_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot270.v8f16" + )] + fn _vcaddq_rot270_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vcaddq_rot270_f16(a, b) +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -1095,6 +1144,44 @@ pub unsafe fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { _vcaddq_rot270_f64(a, b) } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcadd))] +pub unsafe fn vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot90.v4f16" + )] + fn _vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vcadd_rot90_f16(a, b) +} +#[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcadd))] +pub unsafe fn vcaddq_rot90_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot90.v8f16" + )] + fn _vcaddq_rot90_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vcaddq_rot90_f16(a, b) +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -1220,6 +1307,24 @@ pub unsafe fn vcages_f32(a: f32, b: f32) -> u32 { } _vcages_f32(a, b).as_unsigned() } +#[doc = "Floating-point absolute compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcageh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(facge))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcageh_f16(a: f16, b: f16) -> u16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facge.i32.f16" + )] + fn _vcageh_f16(a: f16, b: f16) -> i32; + } + _vcageh_f16(a, b).as_unsigned() as u16 +} #[doc = "Floating-point absolute compare greater than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagt_f64)"] #[doc = "## Safety"] @@ -1292,6 +1397,24 @@ pub unsafe fn vcagts_f32(a: f32, b: f32) -> u32 { } _vcagts_f32(a, b).as_unsigned() } +#[doc = "Floating-point absolute compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagth_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(facgt))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcagth_f16(a: f16, b: f16) -> u16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facgt.i32.f16" + )] + fn _vcagth_f16(a: f16, b: f16) -> i32; + } + _vcagth_f16(a, b).as_unsigned() as u16 +} #[doc = "Floating-point absolute compare less than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcale_f64)"] #[doc = "## Safety"] @@ -1336,6 +1459,17 @@ pub unsafe fn vcaled_f64(a: f64, b: f64) -> u64 { pub unsafe fn vcales_f32(a: f32, b: f32) -> u32 { vcages_f32(b, a) } +#[doc = "Floating-point absolute compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaleh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(facge))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcaleh_f16(a: f16, b: f16) -> u16 { + vcageh_f16(b, a) +} #[doc = "Floating-point absolute compare less than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcalt_f64)"] #[doc = "## Safety"] @@ -1380,6 +1514,17 @@ pub unsafe fn vcaltd_f64(a: f64, b: f64) -> u64 { pub unsafe fn vcalts_f32(a: f32, b: f32) -> u32 { vcagts_f32(b, a) } +#[doc = "Floating-point absolute compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcalth_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(facgt))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcalth_f16(a: f16, b: f16) -> u16 { + vcagth_f16(b, a) +} #[doc = "Floating-point compare equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_f64)"] #[doc = "## Safety"] @@ -1512,6 +1657,41 @@ pub unsafe fn vceqd_s64(a: i64, b: i64) -> u64 { pub unsafe fn vceqd_u64(a: u64, b: u64) -> u64 { transmute(vceq_u64(transmute(a), transmute(b))) } +#[doc = "Floating-point compare equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vceqh_f16(a: f16, b: f16) -> u16 { + simd_extract!(vceq_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) +} +#[doc = "Floating-point compare bitwise equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmeq))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vceqz_f16(a: float16x4_t) -> uint16x4_t { + let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0); + simd_eq(a, transmute(b)) +} +#[doc = "Floating-point compare bitwise equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmeq))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vceqzq_f16(a: float16x8_t) -> uint16x8_t { + let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + simd_eq(a, transmute(b)) +} #[doc = "Floating-point compare bitwise equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_f32)"] #[doc = "## Safety"] @@ -1823,6 +2003,17 @@ pub unsafe fn vceqzd_u64(a: u64) -> u64 { transmute(vceqz_u64(transmute(a))) } #[doc = "Floating-point compare bitwise equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vceqzh_f16(a: f16) -> u16 { + simd_extract!(vceqz_f16(vdup_n_f16(a)), 0) +} +#[doc = "Floating-point compare bitwise equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzs_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -1954,6 +2145,17 @@ pub unsafe fn vcged_s64(a: i64, b: i64) -> u64 { pub unsafe fn vcged_u64(a: u64, b: u64) -> u64 { transmute(vcge_u64(transmute(a), transmute(b))) } +#[doc = "Floating-point compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgeh_f16(a: f16, b: f16) -> u16 { + simd_extract!(vcge_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) +} #[doc = "Floating-point compare greater than or equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_f32)"] #[doc = "## Safety"] @@ -2131,6 +2333,17 @@ pub unsafe fn vcgezs_f32(a: f32) -> u32 { pub unsafe fn vcgezd_s64(a: i64) -> u64 { transmute(vcgez_s64(transmute(a))) } +#[doc = "Floating-point compare greater than or equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgezh_f16(a: f16) -> u16 { + simd_extract!(vcgez_f16(vdup_n_f16(a)), 0) +} #[doc = "Floating-point compare greater than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_f64)"] #[doc = "## Safety"] @@ -2241,6 +2454,17 @@ pub unsafe fn vcgtd_s64(a: i64, b: i64) -> u64 { pub unsafe fn vcgtd_u64(a: u64, b: u64) -> u64 { transmute(vcgt_u64(transmute(a), transmute(b))) } +#[doc = "Floating-point compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgth_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgth_f16(a: f16, b: f16) -> u16 { + simd_extract!(vcgt_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) +} #[doc = "Floating-point compare greater than zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_f32)"] #[doc = "## Safety"] @@ -2418,6 +2642,17 @@ pub unsafe fn vcgtzs_f32(a: f32) -> u32 { pub unsafe fn vcgtzd_s64(a: i64) -> u64 { transmute(vcgtz_s64(transmute(a))) } +#[doc = "Floating-point compare greater than zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgtzh_f16(a: f16) -> u16 { + simd_extract!(vcgtz_f16(vdup_n_f16(a)), 0) +} #[doc = "Floating-point compare less than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_f64)"] #[doc = "## Safety"] @@ -2528,6 +2763,17 @@ pub unsafe fn vcled_u64(a: u64, b: u64) -> u64 { pub unsafe fn vcled_s64(a: i64, b: i64) -> u64 { transmute(vcle_s64(transmute(a), transmute(b))) } +#[doc = "Floating-point compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcleh_f16(a: f16, b: f16) -> u16 { + simd_extract!(vcle_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) +} #[doc = "Floating-point compare less than or equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_f32)"] #[doc = "## Safety"] @@ -2705,6 +2951,17 @@ pub unsafe fn vclezs_f32(a: f32) -> u32 { pub unsafe fn vclezd_s64(a: i64) -> u64 { transmute(vclez_s64(transmute(a))) } +#[doc = "Floating-point compare less than or equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vclezh_f16(a: f16) -> u16 { + simd_extract!(vclez_f16(vdup_n_f16(a)), 0) +} #[doc = "Floating-point compare less than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_f64)"] #[doc = "## Safety"] @@ -2794,6 +3051,17 @@ pub unsafe fn vcltd_s64(a: i64, b: i64) -> u64 { transmute(vclt_s64(transmute(a), transmute(b))) } #[doc = "Floating-point compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclth_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vclth_f16(a: f16, b: f16) -> u16 { + simd_extract!(vclt_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) +} +#[doc = "Floating-point compare less than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclts_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -2992,6 +3260,55 @@ pub unsafe fn vcltzs_f32(a: f32) -> u32 { pub unsafe fn vcltzd_s64(a: i64) -> u64 { transmute(vcltz_s64(transmute(a))) } +#[doc = "Floating-point compare less than zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcmp))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcltzh_f16(a: f16) -> u16 { + simd_extract!(vcltz_f16(vdup_n_f16(a)), 0) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcmla))] +pub unsafe fn vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot0.v4f16" + )] + fn _vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + _vcmla_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcmla))] +pub unsafe fn vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot0.v8f16" + )] + fn _vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + _vcmlaq_f16(a, b, c) +} #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f32)"] #[doc = "## Safety"] @@ -3047,6 +3364,66 @@ pub unsafe fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> floa _vcmlaq_f64(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmla_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 1); + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmlaq_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x4_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 1); + let c: float16x8_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmlaq_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3092,39 +3469,99 @@ pub unsafe fn vcmlaq_lane_f32( vcmlaq_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_laneq_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_laneq_f16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon,fcma")] #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] -pub unsafe fn vcmla_laneq_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x4_t, -) -> float32x2_t { - static_assert_uimm_bits!(LANE, 1); - let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]); - vcmla_f32(a, b, c) +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmla_laneq_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x8_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_laneq_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_laneq_f16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon,fcma")] #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] #[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] -pub unsafe fn vcmlaq_laneq_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, -) -> float32x4_t { - static_assert_uimm_bits!(LANE, 1); - let c: float32x4_t = simd_shuffle!( +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmlaq_laneq_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + let c: float16x8_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmlaq_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_laneq_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +pub unsafe fn vcmla_laneq_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]); + vcmla_f32(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_laneq_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +pub unsafe fn vcmlaq_laneq_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 1); + let c: float32x4_t = simd_shuffle!( c, c, [ @@ -3137,6 +3574,44 @@ pub unsafe fn vcmlaq_laneq_f32( vcmlaq_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcmla))] +pub unsafe fn vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot180.v4f16" + )] + fn _vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + _vcmla_rot180_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcmla))] +pub unsafe fn vcmlaq_rot180_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot180.v8f16" + )] + fn _vcmlaq_rot180_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + _vcmlaq_rot180_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3191,6 +3666,66 @@ pub unsafe fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) _vcmlaq_rot180_f64(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmla_rot180_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 1); + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_rot180_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmlaq_rot180_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x4_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 1); + let c: float16x8_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmlaq_rot180_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3236,6 +3771,66 @@ pub unsafe fn vcmlaq_rot180_lane_f32( vcmlaq_rot180_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmla_rot180_laneq_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x8_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_rot180_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmlaq_rot180_laneq_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + let c: float16x8_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmlaq_rot180_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_laneq_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3281,6 +3876,44 @@ pub unsafe fn vcmlaq_rot180_laneq_f32( vcmlaq_rot180_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcmla))] +pub unsafe fn vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot270.v4f16" + )] + fn _vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + _vcmla_rot270_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcmla))] +pub unsafe fn vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot270.v8f16" + )] + fn _vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + _vcmlaq_rot270_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3335,6 +3968,66 @@ pub unsafe fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) _vcmlaq_rot270_f64(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmla_rot270_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 1); + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_rot270_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmlaq_rot270_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x4_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 1); + let c: float16x8_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmlaq_rot270_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3380,6 +4073,66 @@ pub unsafe fn vcmlaq_rot270_lane_f32( vcmlaq_rot270_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmla_rot270_laneq_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x8_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_rot270_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmlaq_rot270_laneq_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + let c: float16x8_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmlaq_rot270_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_laneq_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3425,18 +4178,56 @@ pub unsafe fn vcmlaq_rot270_laneq_f32( vcmlaq_rot270_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon,fcma")] -#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg_attr(test, assert_instr(fcmla))] -pub unsafe fn vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { +pub unsafe fn vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vcmla.rot90.v2f32" + link_name = "llvm.aarch64.neon.vcmla.rot90.v4f16" + )] + fn _vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + _vcmla_rot90_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fcmla))] +pub unsafe fn vcmlaq_rot90_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot90.v8f16" + )] + fn _vcmlaq_rot90_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + _vcmlaq_rot90_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub unsafe fn vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot90.v2f32" )] fn _vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; } @@ -3479,6 +4270,66 @@ pub unsafe fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) - _vcmlaq_rot90_f64(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmla_rot90_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 1); + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_rot90_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmlaq_rot90_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x4_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 1); + let c: float16x8_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmlaq_rot90_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -3524,6 +4375,66 @@ pub unsafe fn vcmlaq_rot90_lane_f32( vcmlaq_rot90_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmla_rot90_laneq_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x8_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + let c: float16x4_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmla_rot90_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fcma")] +#[cfg_attr(test, assert_instr(fcmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcmlaq_rot90_laneq_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + let c: float16x8_t = simd_shuffle!( + c, + c, + [ + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1, + 2 * LANE as u32, + 2 * LANE as u32 + 1 + ] + ); + vcmlaq_rot90_f16(a, b, c) +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_laneq_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -6721,7 +7632,7 @@ pub unsafe fn vcopyq_laneq_p64( pub unsafe fn vcreate_f64(a: u64) -> float64x1_t { transmute(a) } -#[doc = "Floating-point convert to lower precision narrow"] +#[doc = "Floating-point convert"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_f64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -6787,6 +7698,28 @@ pub unsafe fn vcvt_f64_u64(a: uint64x1_t) -> float64x1_t { pub unsafe fn vcvtq_f64_u64(a: uint64x2_t) -> float64x2_t { simd_cast(a) } +#[doc = "Floating-point convert to lower precision"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f16_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtn2))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_high_f16_f32(a: float16x4_t, b: float32x4_t) -> float16x8_t { + vcombine_f16(a, vcvt_f16_f32(b)) +} +#[doc = "Floating-point convert to higher precision"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtl2))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_high_f32_f16(a: float16x8_t) -> float32x4_t { + vcvt_f32_f16(vget_high_f16(a)) +} #[doc = "Floating-point convert to lower precision narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f32_f64)"] #[doc = "## Safety"] @@ -7043,6 +7976,42 @@ pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t { _vcvtq_u64_f64(a).as_unsigned() } #[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtas))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvta_s16_f16(a: float16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtas.v4i16.v4f16" + )] + fn _vcvta_s16_f16(a: float16x4_t) -> int16x4_t; + } + _vcvta_s16_f16(a) +} +#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtas))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtaq_s16_f16(a: float16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtas.v8i16.v8f16" + )] + fn _vcvtaq_s16_f16(a: float16x8_t) -> int16x8_t; + } + _vcvtaq_s16_f16(a) +} +#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_s32_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -7115,6 +8084,42 @@ pub unsafe fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t { _vcvtaq_s64_f64(a) } #[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtau))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvta_u16_f16(a: float16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtau.v4i16.v4f16" + )] + fn _vcvta_u16_f16(a: float16x4_t) -> int16x4_t; + } + _vcvta_u16_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtau))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtaq_u16_f16(a: float16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtau.v8i16.v8f16" + )] + fn _vcvtaq_u16_f16(a: float16x8_t) -> int16x8_t; + } + _vcvtaq_u16_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_u32_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -7187,6 +8192,100 @@ pub unsafe fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t { _vcvtaq_u64_f64(a).as_unsigned() } #[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtas))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtah_s16_f16(a: f16) -> i16 { + vcvtah_s32_f16(a) as i16 +} +#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtas))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtah_s32_f16(a: f16) -> i32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtas.i32.f16" + )] + fn _vcvtah_s32_f16(a: f16) -> i32; + } + _vcvtah_s32_f16(a) +} +#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtas))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtah_s64_f16(a: f16) -> i64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtas.i64.f16" + )] + fn _vcvtah_s64_f16(a: f16) -> i64; + } + _vcvtah_s64_f16(a) +} +#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtau))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtah_u16_f16(a: f16) -> u16 { + vcvtah_u32_f16(a) as u16 +} +#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtau))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtah_u32_f16(a: f16) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtau.i32.f16" + )] + fn _vcvtah_u32_f16(a: f16) -> i32; + } + _vcvtah_u32_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtau))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtah_u64_f16(a: f16) -> u64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtau.i64.f16" + )] + fn _vcvtah_u64_f16(a: f16) -> i64; + } + _vcvtah_u64_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtas_s32_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -7280,51 +8379,431 @@ pub unsafe fn vcvtd_f64_s64(a: i64) -> f64 { pub unsafe fn vcvts_f32_s32(a: i32) -> f32 { a as f32 } -#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_s32_f32)"] +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_f16_s16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fcvtms))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vcvtm_s32_f32(a: float32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fcvtms.v2i32.v2f32" - )] - fn _vcvtm_s32_f32(a: float32x2_t) -> int32x2_t; - } - _vcvtm_s32_f32(a) +#[cfg_attr(test, assert_instr(scvtf))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_f16_s16(a: i16) -> f16 { + a as f16 } -#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_s32_f32)"] +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_f16_s32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fcvtms))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fcvtms.v4i32.v4f32" - )] - fn _vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t; - } - _vcvtmq_s32_f32(a) +#[cfg_attr(test, assert_instr(scvtf))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_f16_s32(a: i32) -> f16 { + a as f16 } -#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_s64_f64)"] +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_f16_s64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fcvtms))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vcvtm_s64_f64(a: float64x1_t) -> int64x1_t { +#[cfg_attr(test, assert_instr(scvtf))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_f16_s64(a: i64) -> f16 { + a as f16 +} +#[doc = "Unsigned fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(ucvtf))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_f16_u16(a: u16) -> f16 { + a as f16 +} +#[doc = "Unsigned fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_f16_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(ucvtf))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_f16_u32(a: u32) -> f16 { + a as f16 +} +#[doc = "Unsigned fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_f16_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(ucvtf))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_f16_u64(a: u64) -> f16 { + a as f16 +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(scvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_f16_s16(a: i16) -> f16 { + static_assert!(N >= 1 && N <= 16); + vcvth_n_f16_s32::(a as i32) as f16 +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_f16_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(scvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_f16_s32(a: i32) -> f16 { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxs2fp.f16.i32" + )] + fn _vcvth_n_f16_s32(a: i32, n: i32) -> f16; + } + _vcvth_n_f16_s32(a, N) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_f16_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(scvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_f16_s64(a: i64) -> f16 { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxs2fp.f16.i64" + )] + fn _vcvth_n_f16_s64(a: i64, n: i32) -> f16; + } + _vcvth_n_f16_s64(a, N) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(ucvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_f16_u16(a: u16) -> f16 { + static_assert!(N >= 1 && N <= 16); + vcvth_n_f16_u32::(a as u32) as f16 +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_f16_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(ucvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_f16_u32(a: u32) -> f16 { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxu2fp.f16.i32" + )] + fn _vcvth_n_f16_u32(a: i32, n: i32) -> f16; + } + _vcvth_n_f16_u32(a.as_signed(), N) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_f16_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(ucvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_f16_u64(a: u64) -> f16 { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxu2fp.f16.i64" + )] + fn _vcvth_n_f16_u64(a: i64, n: i32) -> f16; + } + _vcvth_n_f16_u64(a.as_signed(), N) +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzs, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_s16_f16(a: f16) -> i16 { + static_assert!(N >= 1 && N <= 16); + vcvth_n_s32_f16::(a) as i16 +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzs, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_s32_f16(a: f16) -> i32 { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxs.i32.f16" + )] + fn _vcvth_n_s32_f16(a: f16, n: i32) -> i32; + } + _vcvth_n_s32_f16(a, N) +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzs, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_s64_f16(a: f16) -> i64 { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxs.i64.f16" + )] + fn _vcvth_n_s64_f16(a: f16, n: i32) -> i64; + } + _vcvth_n_s64_f16(a, N) +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_u16_f16(a: f16) -> u16 { + static_assert!(N >= 1 && N <= 16); + vcvth_n_u32_f16::(a) as u16 +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_u32_f16(a: f16) -> u32 { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxu.i32.f16" + )] + fn _vcvth_n_u32_f16(a: f16, n: i32) -> i32; + } + _vcvth_n_u32_f16(a, N).as_unsigned() +} +#[doc = "Floating-point convert to fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_n_u64_f16(a: f16) -> u64 { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxu.i64.f16" + )] + fn _vcvth_n_u64_f16(a: f16, n: i32) -> i64; + } + _vcvth_n_u64_f16(a, N).as_unsigned() +} +#[doc = "Floating-point convert to signed fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzs))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_s16_f16(a: f16) -> i16 { + a as i16 +} +#[doc = "Floating-point convert to signed fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzs))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_s32_f16(a: f16) -> i32 { + a as i32 +} +#[doc = "Floating-point convert to signed fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzs))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_s64_f16(a: f16) -> i64 { + a as i64 +} +#[doc = "Floating-point convert to unsigned fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_u16_f16(a: f16) -> u16 { + a as u16 +} +#[doc = "Floating-point convert to unsigned fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_u32_f16(a: f16) -> u32 { + a as u32 +} +#[doc = "Floating-point convert to unsigned fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtzu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvth_u64_f16(a: f16) -> u64 { + a as u64 +} +#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtms))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtm_s16_f16(a: float16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtms.v4i16.v4f16" + )] + fn _vcvtm_s16_f16(a: float16x4_t) -> int16x4_t; + } + _vcvtm_s16_f16(a) +} +#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtms))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtmq_s16_f16(a: float16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtms.v8i16.v8f16" + )] + fn _vcvtmq_s16_f16(a: float16x8_t) -> int16x8_t; + } + _vcvtmq_s16_f16(a) +} +#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_s32_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtms))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vcvtm_s32_f32(a: float32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtms.v2i32.v2f32" + )] + fn _vcvtm_s32_f32(a: float32x2_t) -> int32x2_t; + } + _vcvtm_s32_f32(a) +} +#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_s32_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtms))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtms.v4i32.v4f32" + )] + fn _vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t; + } + _vcvtmq_s32_f32(a) +} +#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_s64_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtms))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vcvtm_s64_f64(a: float64x1_t) -> int64x1_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), @@ -7353,6 +8832,42 @@ pub unsafe fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t { _vcvtmq_s64_f64(a) } #[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtmu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtm_u16_f16(a: float16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtmu.v4i16.v4f16" + )] + fn _vcvtm_u16_f16(a: float16x4_t) -> int16x4_t; + } + _vcvtm_u16_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtmu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtmq_u16_f16(a: float16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtmu.v8i16.v8f16" + )] + fn _vcvtmq_u16_f16(a: float16x8_t) -> int16x8_t; + } + _vcvtmq_u16_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_u32_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -7418,11 +8933,105 @@ pub unsafe fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fcvtmu.v2i64.v2f64" + link_name = "llvm.aarch64.neon.fcvtmu.v2i64.v2f64" + )] + fn _vcvtmq_u64_f64(a: float64x2_t) -> int64x2_t; + } + _vcvtmq_u64_f64(a).as_unsigned() +} +#[doc = "Floating-point convert to integer, rounding towards minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtms))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtmh_s16_f16(a: f16) -> i16 { + vcvtmh_s32_f16(a) as i16 +} +#[doc = "Floating-point convert to integer, rounding towards minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtms))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtmh_s32_f16(a: f16) -> i32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtms.i32.f16" + )] + fn _vcvtmh_s32_f16(a: f16) -> i32; + } + _vcvtmh_s32_f16(a) +} +#[doc = "Floating-point convert to integer, rounding towards minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtms))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtmh_s64_f16(a: f16) -> i64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtms.i64.f16" + )] + fn _vcvtmh_s64_f16(a: f16) -> i64; + } + _vcvtmh_s64_f16(a) +} +#[doc = "Floating-point convert to integer, rounding towards minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtmu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtmh_u16_f16(a: f16) -> u16 { + vcvtmh_u32_f16(a) as u16 +} +#[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtmu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtmh_u32_f16(a: f16) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtmu.i32.f16" + )] + fn _vcvtmh_u32_f16(a: f16) -> i32; + } + _vcvtmh_u32_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtmu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtmh_u64_f16(a: f16) -> u64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtmu.i64.f16" )] - fn _vcvtmq_u64_f64(a: float64x2_t) -> int64x2_t; + fn _vcvtmh_u64_f16(a: f16) -> i64; } - _vcvtmq_u64_f64(a).as_unsigned() + _vcvtmh_u64_f16(a).as_unsigned() } #[doc = "Floating-point convert to signed integer, rounding toward minus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtms_s32_f32)"] @@ -7497,6 +9106,42 @@ pub unsafe fn vcvtmd_u64_f64(a: f64) -> u64 { _vcvtmd_u64_f64(a).as_unsigned() } #[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtns))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtn_s16_f16(a: float16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtns.v4i16.v4f16" + )] + fn _vcvtn_s16_f16(a: float16x4_t) -> int16x4_t; + } + _vcvtn_s16_f16(a) +} +#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtns))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtnq_s16_f16(a: float16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtns.v8i16.v8f16" + )] + fn _vcvtnq_s16_f16(a: float16x8_t) -> int16x8_t; + } + _vcvtnq_s16_f16(a) +} +#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_s32_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -7569,6 +9214,42 @@ pub unsafe fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t { _vcvtnq_s64_f64(a) } #[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtnu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtn_u16_f16(a: float16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtnu.v4i16.v4f16" + )] + fn _vcvtn_u16_f16(a: float16x4_t) -> int16x4_t; + } + _vcvtn_u16_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtnu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtnq_u16_f16(a: float16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtnu.v8i16.v8f16" + )] + fn _vcvtnq_u16_f16(a: float16x8_t) -> int16x8_t; + } + _vcvtnq_u16_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_u32_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -7640,6 +9321,100 @@ pub unsafe fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t { } _vcvtnq_u64_f64(a).as_unsigned() } +#[doc = "Floating-point convert to integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtns))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtnh_s16_f16(a: f16) -> i16 { + vcvtnh_s32_f16(a) as i16 +} +#[doc = "Floating-point convert to integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtns))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtnh_s32_f16(a: f16) -> i32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtns.i32.f16" + )] + fn _vcvtnh_s32_f16(a: f16) -> i32; + } + _vcvtnh_s32_f16(a) +} +#[doc = "Floating-point convert to integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtns))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtnh_s64_f16(a: f16) -> i64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtns.i64.f16" + )] + fn _vcvtnh_s64_f16(a: f16) -> i64; + } + _vcvtnh_s64_f16(a) +} +#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtnu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtnh_u16_f16(a: f16) -> u16 { + vcvtnh_u32_f16(a) as u16 +} +#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtnu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtnh_u32_f16(a: f16) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtnu.i32.f16" + )] + fn _vcvtnh_u32_f16(a: f16) -> i32; + } + _vcvtnh_u32_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtnu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtnh_u64_f16(a: f16) -> u64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtnu.i64.f16" + )] + fn _vcvtnh_u64_f16(a: f16) -> i64; + } + _vcvtnh_u64_f16(a).as_unsigned() +} #[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtns_s32_f32)"] #[doc = "## Safety"] @@ -7712,6 +9487,42 @@ pub unsafe fn vcvtnd_u64_f64(a: f64) -> u64 { } _vcvtnd_u64_f64(a).as_unsigned() } +#[doc = "Floating-point convert to signed integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtps))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtp_s16_f16(a: float16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtps.v4i16.v4f16" + )] + fn _vcvtp_s16_f16(a: float16x4_t) -> int16x4_t; + } + _vcvtp_s16_f16(a) +} +#[doc = "Floating-point convert to signed integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtps))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtpq_s16_f16(a: float16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtps.v8i16.v8f16" + )] + fn _vcvtpq_s16_f16(a: float16x8_t) -> int16x8_t; + } + _vcvtpq_s16_f16(a) +} #[doc = "Floating-point convert to signed integer, rounding toward plus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_s32_f32)"] #[doc = "## Safety"] @@ -7784,6 +9595,42 @@ pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t { } _vcvtpq_s64_f64(a) } +#[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtpu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtp_u16_f16(a: float16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtpu.v4i16.v4f16" + )] + fn _vcvtp_u16_f16(a: float16x4_t) -> int16x4_t; + } + _vcvtp_u16_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtpu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtpq_u16_f16(a: float16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtpu.v8i16.v8f16" + )] + fn _vcvtpq_u16_f16(a: float16x8_t) -> int16x8_t; + } + _vcvtpq_u16_f16(a).as_unsigned() +} #[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_u32_f32)"] #[doc = "## Safety"] @@ -7856,6 +9703,100 @@ pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t { } _vcvtpq_u64_f64(a).as_unsigned() } +#[doc = "Floating-point convert to integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtps))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtph_s16_f16(a: f16) -> i16 { + vcvtph_s32_f16(a) as i16 +} +#[doc = "Floating-point convert to integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtps))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtph_s32_f16(a: f16) -> i32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtps.i32.f16" + )] + fn _vcvtph_s32_f16(a: f16) -> i32; + } + _vcvtph_s32_f16(a) +} +#[doc = "Floating-point convert to integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtps))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtph_s64_f16(a: f16) -> i64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtps.i64.f16" + )] + fn _vcvtph_s64_f16(a: f16) -> i64; + } + _vcvtph_s64_f16(a) +} +#[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtpu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtph_u16_f16(a: f16) -> u16 { + vcvtph_u32_f16(a) as u16 +} +#[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtpu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtph_u32_f16(a: f16) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtpu.i32.f16" + )] + fn _vcvtph_u32_f16(a: f16) -> i32; + } + _vcvtph_u32_f16(a).as_unsigned() +} +#[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fcvtpu))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtph_u64_f16(a: f16) -> u64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtpu.i64.f16" + )] + fn _vcvtph_u64_f16(a: f16) -> i64; + } + _vcvtph_u64_f16(a).as_unsigned() +} #[doc = "Floating-point convert to signed integer, rounding toward plus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtps_s32_f32)"] #[doc = "## Safety"] @@ -8195,6 +10136,28 @@ pub unsafe fn vcvtxd_f32_f64(a: f64) -> f32 { simd_extract!(vcvtx_f32_f64(vdupq_n_f64(a)), 0) } #[doc = "Divide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdiv_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fdiv))] +pub unsafe fn vdiv_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_div(a, b) +} +#[doc = "Divide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdivq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fdiv))] +pub unsafe fn vdivq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_div(a, b) +} +#[doc = "Divide"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdiv_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -8238,6 +10201,17 @@ pub unsafe fn vdiv_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { pub unsafe fn vdivq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { simd_div(a, b) } +#[doc = "Divide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdivh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vdivh_f16(a: f16, b: f16) -> f16 { + a / b +} #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_s32)"] #[doc = "## Safety"] @@ -8444,7 +10418,7 @@ pub unsafe fn vduph_laneq_p16(a: poly16x8_t) -> p16 { static_assert_uimm_bits!(N, 3); simd_extract!(a, N as u32) } -#[doc = "Set all vector lanes to the same value"] +#[doc = "Extract an element from a vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_s8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -8457,7 +10431,7 @@ pub unsafe fn vdupb_laneq_s8(a: int8x16_t) -> i8 { static_assert_uimm_bits!(N, 4); simd_extract!(a, N as u32) } -#[doc = "Set all vector lanes to the same value"] +#[doc = "Extract an element from a vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_u8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -8470,7 +10444,7 @@ pub unsafe fn vdupb_laneq_u8(a: uint8x16_t) -> u8 { static_assert_uimm_bits!(N, 4); simd_extract!(a, N as u32) } -#[doc = "Set all vector lanes to the same value"] +#[doc = "Extract an element from a vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_p8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -8523,6 +10497,32 @@ pub unsafe fn vdupd_lane_u64(a: uint64x1_t) -> u64 { simd_extract!(a, N as u32) } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(nop, N = 2))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vduph_lane_f16(a: float16x4_t) -> f16 { + static_assert_uimm_bits!(N, 2); + simd_extract!(a, N as u32) +} +#[doc = "Extract an element from a vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(nop, N = 4))] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vduph_laneq_f16(a: float16x8_t) -> f16 { + static_assert_uimm_bits!(N, 4); + simd_extract!(a, N as u32) +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -8927,6 +10927,74 @@ pub unsafe fn vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float6 _vfma_f64(b, c, a) } #[doc = "Floating-point fused multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfma_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + vfma_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) +} +#[doc = "Floating-point fused multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfma_laneq_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x8_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 3); + vfma_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) +} +#[doc = "Floating-point fused multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmaq_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x4_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + vfmaq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) +} +#[doc = "Floating-point fused multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmla, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmaq_laneq_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + vfmaq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) +} +#[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -9045,6 +11113,28 @@ pub unsafe fn vfma_laneq_f64( static_assert_uimm_bits!(LANE, 1); vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) } +#[doc = "Floating-point fused Multiply-Subtract from accumulator."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmla))] +pub unsafe fn vfma_n_f16(a: float16x4_t, b: float16x4_t, c: f16) -> float16x4_t { + vfma_f16(a, b, vdup_n_f16(c)) +} +#[doc = "Floating-point fused Multiply-Subtract from accumulator."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmla))] +pub unsafe fn vfmaq_n_f16(a: float16x8_t, b: float16x8_t, c: f16) -> float16x8_t { + vfmaq_f16(a, b, vdupq_n_f16(c)) +} #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f64)"] #[doc = "## Safety"] @@ -9077,6 +11167,52 @@ pub unsafe fn vfmad_lane_f64(a: f64, b: f64, c: float64x1_t) -> let c: f64 = simd_extract!(c, LANE as u32); _vfmad_lane_f64(b, c, a) } +#[doc = "Floating-point fused multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmadd))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmah_f16(a: f16, b: f16, c: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.fma.f16" + )] + fn _vfmah_f16(a: f16, b: f16, c: f16) -> f16; + } + _vfmah_f16(b, c, a) +} +#[doc = "Floating-point fused multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmadd, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmah_lane_f16(a: f16, b: f16, v: float16x4_t) -> f16 { + static_assert_uimm_bits!(LANE, 2); + let c: f16 = simd_extract!(v, LANE as u32); + vfmah_f16(a, b, c) +} +#[doc = "Floating-point fused multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmadd, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmah_laneq_f16(a: f16, b: f16, v: float16x8_t) -> f16 { + static_assert_uimm_bits!(LANE, 3); + let c: f16 = simd_extract!(v, LANE as u32); + vfmah_f16(a, b, c) +} #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f64)"] #[doc = "## Safety"] @@ -9186,17 +11322,525 @@ pub unsafe fn vfmad_laneq_f64(a: f64, b: f64, c: float64x2_t) - let c: f64 = simd_extract!(c, LANE as u32); _vfmad_laneq_f64(b, c, a) } +#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmlal2))] +pub unsafe fn vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlal2.v2f32.v4f16" + )] + fn _vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t; + } + _vfmlal_high_f16(r, a, b) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmlal2))] +pub unsafe fn vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlal2.v4f32.v8f16" + )] + fn _vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + _vfmlalq_high_f16(r, a, b) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_lane_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlal2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlal_lane_high_f16( + r: float32x2_t, + a: float16x4_t, + b: float16x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 2); + vfmlal_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_laneq_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlal2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlal_laneq_high_f16( + r: float32x2_t, + a: float16x4_t, + b: float16x8_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 3); + vfmlal_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_lane_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlal2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlalq_lane_high_f16( + r: float32x4_t, + a: float16x8_t, + b: float16x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + vfmlalq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_laneq_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlal2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlalq_laneq_high_f16( + r: float32x4_t, + a: float16x8_t, + b: float16x8_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 3); + vfmlalq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_lane_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlal, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlal_lane_low_f16( + r: float32x2_t, + a: float16x4_t, + b: float16x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 2); + vfmlal_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_laneq_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlal, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlal_laneq_low_f16( + r: float32x2_t, + a: float16x4_t, + b: float16x8_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 3); + vfmlal_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_lane_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlal, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlalq_lane_low_f16( + r: float32x4_t, + a: float16x8_t, + b: float16x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + vfmlalq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_laneq_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlal, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlalq_laneq_low_f16( + r: float32x4_t, + a: float16x8_t, + b: float16x8_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 3); + vfmlalq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmlal))] +pub unsafe fn vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlal.v2f32.v4f16" + )] + fn _vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t; + } + _vfmlal_low_f16(r, a, b) +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmlal))] +pub unsafe fn vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlal.v4f32.v8f16" + )] + fn _vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + _vfmlalq_low_f16(r, a, b) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmlsl2))] +pub unsafe fn vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlsl2.v2f32.v4f16" + )] + fn _vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t; + } + _vfmlsl_high_f16(r, a, b) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmlsl2))] +pub unsafe fn vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlsl2.v4f32.v8f16" + )] + fn _vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + _vfmlslq_high_f16(r, a, b) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_lane_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlsl2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlsl_lane_high_f16( + r: float32x2_t, + a: float16x4_t, + b: float16x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 2); + vfmlsl_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_laneq_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlsl2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlsl_laneq_high_f16( + r: float32x2_t, + a: float16x4_t, + b: float16x8_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 3); + vfmlsl_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_lane_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlsl2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlslq_lane_high_f16( + r: float32x4_t, + a: float16x8_t, + b: float16x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + vfmlslq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_laneq_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlsl2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlslq_laneq_high_f16( + r: float32x4_t, + a: float16x8_t, + b: float16x8_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 3); + vfmlslq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_lane_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlsl, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlsl_lane_low_f16( + r: float32x2_t, + a: float16x4_t, + b: float16x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 2); + vfmlsl_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_laneq_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlsl, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlsl_laneq_low_f16( + r: float32x2_t, + a: float16x4_t, + b: float16x8_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 3); + vfmlsl_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_lane_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlsl, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlslq_lane_low_f16( + r: float32x4_t, + a: float16x8_t, + b: float16x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + vfmlslq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_laneq_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmlsl, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmlslq_laneq_low_f16( + r: float32x4_t, + a: float16x8_t, + b: float16x8_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 3); + vfmlslq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmlsl))] +pub unsafe fn vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlsl.v2f32.v4f16" + )] + fn _vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t; + } + _vfmlsl_low_f16(r, a, b) +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmlsl))] +pub unsafe fn vfmlslq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlsl.v4f32.v8f16" + )] + fn _vfmlslq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + _vfmlslq_low_f16(r, a, b) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmsub))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vfms_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t { + let b: float64x1_t = simd_neg(b); + vfma_f64(a, b, c) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmls, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfms_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + vfms_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmls, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfms_laneq_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x8_t, +) -> float16x4_t { + static_assert_uimm_bits!(LANE, 3); + vfms_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmls, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmsq_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x4_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + vfmsq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) +} #[doc = "Floating-point fused multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fmsub))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vfms_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t { - let b: float64x1_t = simd_neg(b); - vfma_f64(a, b, c) +#[cfg_attr(test, assert_instr(fmls, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmsq_laneq_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, +) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + vfmsq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) } #[doc = "Floating-point fused multiply-subtract to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f32)"] @@ -9317,6 +11961,28 @@ pub unsafe fn vfms_laneq_f64( static_assert_uimm_bits!(LANE, 1); vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) } +#[doc = "Floating-point fused Multiply-Subtract from accumulator."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmls))] +pub unsafe fn vfms_n_f16(a: float16x4_t, b: float16x4_t, c: f16) -> float16x4_t { + vfms_f16(a, b, vdup_n_f16(c)) +} +#[doc = "Floating-point fused Multiply-Subtract from accumulator."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmls))] +pub unsafe fn vfmsq_n_f16(a: float16x8_t, b: float16x8_t, c: f16) -> float16x8_t { + vfmsq_f16(a, b, vdupq_n_f16(c)) +} #[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f64)"] #[doc = "## Safety"] @@ -9329,6 +11995,45 @@ pub unsafe fn vfms_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t vfms_f64(a, b, vdup_n_f64(c)) } #[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmsub))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmsh_f16(a: f16, b: f16, c: f16) -> f16 { + vfmah_f16(a, -b, c) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsh_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmsub, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmsh_lane_f16(a: f16, b: f16, v: float16x4_t) -> f16 { + static_assert_uimm_bits!(LANE, 2); + let c: f16 = simd_extract!(v, LANE as u32); + vfmsh_f16(a, b, c) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsh_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmsub, LANE = 0))] +#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmsh_laneq_f16(a: f16, b: f16, v: float16x8_t) -> f16 { + static_assert_uimm_bits!(LANE, 3); + let c: f16 = simd_extract!(v, LANE as u32); + vfmsh_f16(a, b, c) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -9417,6 +12122,28 @@ pub unsafe fn vfmsd_laneq_f64(a: f64, b: f64, c: float64x2_t) - vfmad_laneq_f64::(a, -b, c) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(ldr))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t { + crate::ptr::read_unaligned(ptr.cast()) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(ldr))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t { + crate::ptr::read_unaligned(ptr.cast()) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -11106,6 +13833,24 @@ pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { } _vmaxq_f64(a, b) } +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmax))] +pub unsafe fn vmaxh_f16(a: f16, b: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.f16" + )] + fn _vmaxh_f16(a: f16, b: f16) -> f16; + } + _vmaxh_f16(a, b) +} #[doc = "Floating-point Maximum Number (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f64)"] #[doc = "## Safety"] @@ -11142,6 +13887,60 @@ pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { } _vmaxnmq_f64(a, b) } +#[doc = "Floating-point Maximum Number"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxnm))] +pub unsafe fn vmaxnmh_f16(a: f16, b: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.f16" + )] + fn _vmaxnmh_f16(a: f16, b: f16) -> f16; + } + _vmaxnmh_f16(a, b) +} +#[doc = "Floating-point maximum number across vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmv_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxnmv))] +pub unsafe fn vmaxnmv_f16(a: float16x4_t) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmv.f16.v4f16" + )] + fn _vmaxnmv_f16(a: float16x4_t) -> f16; + } + _vmaxnmv_f16(a) +} +#[doc = "Floating-point maximum number across vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmvq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxnmv))] +pub unsafe fn vmaxnmvq_f16(a: float16x8_t) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmv.f16.v8f16" + )] + fn _vmaxnmvq_f16(a: float16x8_t) -> f16; + } + _vmaxnmvq_f16(a) +} #[doc = "Floating-point maximum number across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmv_f32)"] #[doc = "## Safety"] @@ -11196,6 +13995,42 @@ pub unsafe fn vmaxnmvq_f32(a: float32x4_t) -> f32 { } _vmaxnmvq_f32(a) } +#[doc = "Floating-point maximum number across vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxv_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxv))] +pub unsafe fn vmaxv_f16(a: float16x4_t) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxv.f16.v4f16" + )] + fn _vmaxv_f16(a: float16x4_t) -> f16; + } + _vmaxv_f16(a) +} +#[doc = "Floating-point maximum number across vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxvq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxv))] +pub unsafe fn vmaxvq_f16(a: float16x8_t) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxv.f16.v8f16" + )] + fn _vmaxvq_f16(a: float16x8_t) -> f16; + } + _vmaxvq_f16(a) +} #[doc = "Horizontal vector max."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxv_f32)"] #[doc = "## Safety"] @@ -11502,6 +14337,24 @@ pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { } _vminq_f64(a, b) } +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmin))] +pub unsafe fn vminh_f16(a: f16, b: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.f16" + )] + fn _vminh_f16(a: f16, b: f16) -> f16; + } + _vminh_f16(a, b) +} #[doc = "Floating-point Minimum Number (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f64)"] #[doc = "## Safety"] @@ -11538,6 +14391,60 @@ pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { } _vminnmq_f64(a, b) } +#[doc = "Floating-point Minimum Number"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminnm))] +pub unsafe fn vminnmh_f16(a: f16, b: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.f16" + )] + fn _vminnmh_f16(a: f16, b: f16) -> f16; + } + _vminnmh_f16(a, b) +} +#[doc = "Floating-point minimum number across vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmv_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminnmv))] +pub unsafe fn vminnmv_f16(a: float16x4_t) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmv.f16.v4f16" + )] + fn _vminnmv_f16(a: float16x4_t) -> f16; + } + _vminnmv_f16(a) +} +#[doc = "Floating-point minimum number across vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmvq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminnmv))] +pub unsafe fn vminnmvq_f16(a: float16x8_t) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmv.f16.v8f16" + )] + fn _vminnmvq_f16(a: float16x8_t) -> f16; + } + _vminnmvq_f16(a) +} #[doc = "Floating-point minimum number across vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmv_f32)"] #[doc = "## Safety"] @@ -11592,6 +14499,42 @@ pub unsafe fn vminnmvq_f32(a: float32x4_t) -> f32 { } _vminnmvq_f32(a) } +#[doc = "Floating-point minimum number across vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminv_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminv))] +pub unsafe fn vminv_f16(a: float16x4_t) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminv.f16.v4f16" + )] + fn _vminv_f16(a: float16x4_t) -> f16; + } + _vminv_f16(a) +} +#[doc = "Floating-point minimum number across vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminvq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminv))] +pub unsafe fn vminvq_f16(a: float16x8_t) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminv.f16.v8f16" + )] + fn _vminvq_f16(a: float16x8_t) -> f16; + } + _vminvq_f16(a) +} #[doc = "Horizontal vector min."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminv_f32)"] #[doc = "## Safety"] @@ -12770,6 +15713,51 @@ pub unsafe fn vmul_lane_f64(a: float64x1_t, b: float64x1_t) -> simd_mul(a, transmute::(simd_extract!(b, LANE as u32))) } #[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmul, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmul_laneq_f16(a: float16x4_t, b: float16x8_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 3); + simd_mul( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) +} +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmul, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulq_laneq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_mul( + a, + simd_shuffle!( + b, + b, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) +} +#[doc = "Floating-point multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -12812,10 +15800,49 @@ pub unsafe fn vmulq_n_f64(a: float64x2_t, b: f64) -> float64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmul, LANE = 0))] #[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vmuld_lane_f64(a: f64, b: float64x1_t) -> f64 { - static_assert!(LANE == 0); - let b: f64 = simd_extract!(b, LANE as u32); +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vmuld_lane_f64(a: f64, b: float64x1_t) -> f64 { + static_assert!(LANE == 0); + let b: f64 = simd_extract!(b, LANE as u32); + a * b +} +#[doc = "Add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vmulh_f16(a: f16, b: f16) -> f16 { + a * b +} +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulh_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmul, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulh_lane_f16(a: f16, b: float16x4_t) -> f16 { + static_assert_uimm_bits!(LANE, 2); + let b: f16 = simd_extract!(b, LANE as u32); + a * b +} +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulh_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmul, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulh_laneq_f16(a: f16, b: float16x8_t) -> f16 { + static_assert_uimm_bits!(LANE, 3); + let b: f16 = simd_extract!(b, LANE as u32); a * b } #[doc = "Multiply long"] @@ -13231,6 +16258,42 @@ pub unsafe fn vmuld_laneq_f64(a: f64, b: float64x2_t) -> f64 { a * b } #[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmulx))] +pub unsafe fn vmulx_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmulx.v4f16" + )] + fn _vmulx_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vmulx_f16(a, b) +} +#[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmulx))] +pub unsafe fn vmulxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmulx.v8f16" + )] + fn _vmulxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vmulxq_f16(a, b) +} +#[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -13303,6 +16366,96 @@ pub unsafe fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { _vmulxq_f64(a, b) } #[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmulx, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulx_lane_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + vmulx_f16( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) +} +#[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmulx, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulx_laneq_f16(a: float16x4_t, b: float16x8_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 3); + vmulx_f16( + a, + simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) +} +#[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmulx, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulxq_lane_f16(a: float16x8_t, b: float16x4_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + vmulxq_f16( + a, + simd_shuffle!( + b, + b, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) +} +#[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmulx, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulxq_laneq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + vmulxq_f16( + a, + simd_shuffle!( + b, + b, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) +} +#[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -13399,6 +16552,28 @@ pub unsafe fn vmulx_laneq_f64(a: float64x1_t, b: float64x2_t) - static_assert_uimm_bits!(LANE, 1); vmulx_f64(a, transmute::(simd_extract!(b, LANE as u32))) } +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmulx))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulx_n_f16(a: float16x4_t, b: f16) -> float16x4_t { + vmulx_f16(a, vdup_n_f16(b)) +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmulx))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulxq_n_f16(a: float16x8_t, b: f16) -> float16x8_t { + vmulxq_f16(a, vdupq_n_f16(b)) +} #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_f64)"] #[doc = "## Safety"] @@ -13488,6 +16663,50 @@ pub unsafe fn vmulxs_laneq_f32(a: f32, b: float32x4_t) -> f32 { vmulxs_f32(a, simd_extract!(b, LANE as u32)) } #[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmulx))] +pub unsafe fn vmulxh_f16(a: f16, b: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmulx.f16" + )] + fn _vmulxh_f16(a: f16, b: f16) -> f16; + } + _vmulxh_f16(a, b) +} +#[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxh_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmulx, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulxh_lane_f16(a: f16, b: float16x4_t) -> f16 { + static_assert_uimm_bits!(LANE, 2); + vmulxh_f16(a, simd_extract!(b, LANE as u32)) +} +#[doc = "Floating-point multiply extended"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxh_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fmulx, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulxh_laneq_f16(a: f16, b: float16x8_t) -> f16 { + static_assert_uimm_bits!(LANE, 3); + vmulxh_f16(a, simd_extract!(b, LANE as u32)) +} +#[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -13555,6 +16774,17 @@ pub unsafe fn vnegq_s64(a: int64x2_t) -> int64x2_t { pub unsafe fn vnegd_s64(a: i64) -> i64 { a.wrapping_neg() } +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fneg))] +pub unsafe fn vnegh_f16(a: f16) -> f16 { + -a +} #[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_f64)"] #[doc = "## Safety"] @@ -13618,6 +16848,24 @@ pub unsafe fn vpaddd_u64(a: uint64x2_t) -> u64 { vaddvq_u64(a) } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(faddp))] +pub unsafe fn vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.faddp.v8f16" + )] + fn _vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vpaddq_f16(a, b) +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -13837,6 +17085,78 @@ pub unsafe fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { let ret_val: uint64x2_t = transmute(vpaddq_s64(transmute(a), transmute(b))); simd_shuffle!(ret_val, ret_val, [1, 0]) } +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxp))] +pub unsafe fn vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxp.v4f16" + )] + fn _vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vpmax_f16(a, b) +} +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxp))] +pub unsafe fn vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxp.v8f16" + )] + fn _vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vpmaxq_f16(a, b) +} +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +pub unsafe fn vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmp.v4f16" + )] + fn _vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vpmaxnm_f16(a, b) +} +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +pub unsafe fn vpmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmp.v8f16" + )] + fn _vpmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vpmaxnmq_f16(a, b) +} #[doc = "Floating-point Maximum Number Pairwise (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f32)"] #[doc = "## Safety"] @@ -14065,47 +17385,119 @@ pub unsafe fn vpmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.umaxp.v4i32" + link_name = "llvm.aarch64.neon.umaxp.v4i32" + )] + fn _vpmaxq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + _vpmaxq_u32(a.as_signed(), b.as_signed()).as_unsigned() +} +#[doc = "Floating-point maximum pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxqd_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(fmaxp))] +pub unsafe fn vpmaxqd_f64(a: float64x2_t) -> f64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64" + )] + fn _vpmaxqd_f64(a: float64x2_t) -> f64; + } + _vpmaxqd_f64(a) +} +#[doc = "Floating-point maximum pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxs_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(fmaxp))] +pub unsafe fn vpmaxs_f32(a: float32x2_t) -> f32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32" + )] + fn _vpmaxs_f32(a: float32x2_t) -> f32; + } + _vpmaxs_f32(a) +} +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminp))] +pub unsafe fn vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminp.v4f16" + )] + fn _vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vpmin_f16(a, b) +} +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminp))] +pub unsafe fn vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminp.v8f16" )] - fn _vpmaxq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + fn _vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; } - _vpmaxq_u32(a.as_signed(), b.as_signed()).as_unsigned() + _vpminq_f16(a, b) } -#[doc = "Floating-point maximum pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxqd_f64)"] +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(fmaxp))] -pub unsafe fn vpmaxqd_f64(a: float64x2_t) -> f64 { +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminnmp))] +pub unsafe fn vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64" + link_name = "llvm.aarch64.neon.fminnmp.v4f16" )] - fn _vpmaxqd_f64(a: float64x2_t) -> f64; + fn _vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; } - _vpmaxqd_f64(a) + _vpminnm_f16(a, b) } -#[doc = "Floating-point maximum pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxs_f32)"] +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(fmaxp))] -pub unsafe fn vpmaxs_f32(a: float32x2_t) -> f32 { +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fminnmp))] +pub unsafe fn vpminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32" + link_name = "llvm.aarch64.neon.fminnmp.v8f16" )] - fn _vpmaxs_f32(a: float32x2_t) -> f32; + fn _vpminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; } - _vpmaxs_f32(a) + _vpminnmq_f16(a, b) } #[doc = "Floating-point Minimum Number Pairwise (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f32)"] @@ -19520,6 +22912,24 @@ pub unsafe fn vrecpes_f32(a: f32) -> f32 { } _vrecpes_f32(a) } +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(frecpe))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrecpeh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.f16" + )] + fn _vrecpeh_f16(a: f16) -> f16; + } + _vrecpeh_f16(a) +} #[doc = "Floating-point reciprocal step"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f64)"] #[doc = "## Safety"] @@ -19592,6 +23002,24 @@ pub unsafe fn vrecpss_f32(a: f32, b: f32) -> f32 { } _vrecpss_f32(a, b) } +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(frecps))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrecpsh_f16(a: f16, b: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.f16" + )] + fn _vrecpsh_f16(a: f16, b: f16) -> f16; + } + _vrecpsh_f16(a, b) +} #[doc = "Floating-point reciprocal exponent"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpxd_f64)"] #[doc = "## Safety"] @@ -19628,6 +23056,126 @@ pub unsafe fn vrecpxs_f32(a: f32) -> f32 { } _vrecpxs_f32(a) } +#[doc = "Floating-point reciprocal exponent"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpxh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(frecpx))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrecpxh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpx.f16" + )] + fn _vrecpxh_f16(a: f16) -> f16; + } + _vrecpxh_f16(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"] #[doc = "## Safety"] @@ -21369,6 +24917,42 @@ pub unsafe fn vrnd64z_f64(a: float64x1_t) -> float64x1_t { transmute(_vrnd64z_f64(simd_extract!(a, 0))) } #[doc = "Floating-point round to integral, toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintz))] +pub unsafe fn vrnd_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.trunc.v4f16" + )] + fn _vrnd_f16(a: float16x4_t) -> float16x4_t; + } + _vrnd_f16(a) +} +#[doc = "Floating-point round to integral, toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintz))] +pub unsafe fn vrndq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.trunc.v8f16" + )] + fn _vrndq_f16(a: float16x8_t) -> float16x8_t; + } + _vrndq_f16(a) +} +#[doc = "Floating-point round to integral, toward zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -21441,6 +25025,42 @@ pub unsafe fn vrndq_f64(a: float64x2_t) -> float64x2_t { _vrndq_f64(a) } #[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frinta))] +pub unsafe fn vrnda_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.round.v4f16" + )] + fn _vrnda_f16(a: float16x4_t) -> float16x4_t; + } + _vrnda_f16(a) +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frinta))] +pub unsafe fn vrndaq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.round.v8f16" + )] + fn _vrndaq_f16(a: float16x8_t) -> float16x8_t; + } + _vrndaq_f16(a) +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -21512,6 +25132,78 @@ pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t { } _vrndaq_f64(a) } +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndah_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frinta))] +pub unsafe fn vrndah_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.round.f16" + )] + fn _vrndah_f16(a: f16) -> f16; + } + _vrndah_f16(a) +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintz))] +pub unsafe fn vrndh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.trunc.f16" + )] + fn _vrndh_f16(a: f16) -> f16; + } + _vrndh_f16(a) +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frinti))] +pub unsafe fn vrndi_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v4f16" + )] + fn _vrndi_f16(a: float16x4_t) -> float16x4_t; + } + _vrndi_f16(a) +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frinti))] +pub unsafe fn vrndiq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v8f16" + )] + fn _vrndiq_f16(a: float16x8_t) -> float16x8_t; + } + _vrndiq_f16(a) +} #[doc = "Floating-point round to integral, using current rounding mode"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f32)"] #[doc = "## Safety"] @@ -21560,29 +25252,83 @@ pub unsafe fn vrndi_f64(a: float64x1_t) -> float64x1_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.nearbyint.v1f64" + link_name = "llvm.nearbyint.v1f64" + )] + fn _vrndi_f64(a: float64x1_t) -> float64x1_t; + } + _vrndi_f64(a) +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinti))] +pub unsafe fn vrndiq_f64(a: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v2f64" + )] + fn _vrndiq_f64(a: float64x2_t) -> float64x2_t; + } + _vrndiq_f64(a) +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndih_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frinti))] +pub unsafe fn vrndih_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.f16" + )] + fn _vrndih_f16(a: f16) -> f16; + } + _vrndih_f16(a) +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintm))] +pub unsafe fn vrndm_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.floor.v4f16" )] - fn _vrndi_f64(a: float64x1_t) -> float64x1_t; + fn _vrndm_f16(a: float16x4_t) -> float16x4_t; } - _vrndi_f64(a) + _vrndm_f16(a) } -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f64)"] +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinti))] -pub unsafe fn vrndiq_f64(a: float64x2_t) -> float64x2_t { +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintm))] +pub unsafe fn vrndmq_f16(a: float16x8_t) -> float16x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.nearbyint.v2f64" + link_name = "llvm.floor.v8f16" )] - fn _vrndiq_f64(a: float64x2_t) -> float64x2_t; + fn _vrndmq_f16(a: float16x8_t) -> float16x8_t; } - _vrndiq_f64(a) + _vrndmq_f16(a) } #[doc = "Floating-point round to integral, toward minus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f32)"] @@ -21656,6 +25402,24 @@ pub unsafe fn vrndmq_f64(a: float64x2_t) -> float64x2_t { } _vrndmq_f64(a) } +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintm))] +pub unsafe fn vrndmh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.floor.f16" + )] + fn _vrndmh_f16(a: f16) -> f16; + } + _vrndmh_f16(a) +} #[doc = "Floating-point round to integral, to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f64)"] #[doc = "## Safety"] @@ -21692,6 +25456,24 @@ pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t { } _vrndnq_f64(a) } +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintn))] +pub unsafe fn vrndnh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.roundeven.f16" + )] + fn _vrndnh_f16(a: f16) -> f16; + } + _vrndnh_f16(a) +} #[doc = "Floating-point round to integral, to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndns_f32)"] #[doc = "## Safety"] @@ -21711,6 +25493,42 @@ pub unsafe fn vrndns_f32(a: f32) -> f32 { _vrndns_f32(a) } #[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintp))] +pub unsafe fn vrndp_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.ceil.v4f16" + )] + fn _vrndp_f16(a: float16x4_t) -> float16x4_t; + } + _vrndp_f16(a) +} +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintp))] +pub unsafe fn vrndpq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.ceil.v8f16" + )] + fn _vrndpq_f16(a: float16x8_t) -> float16x8_t; + } + _vrndpq_f16(a) +} +#[doc = "Floating-point round to integral, toward plus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -21782,6 +25600,60 @@ pub unsafe fn vrndpq_f64(a: float64x2_t) -> float64x2_t { } _vrndpq_f64(a) } +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndph_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintp))] +pub unsafe fn vrndph_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.ceil.f16" + )] + fn _vrndph_f16(a: f16) -> f16; + } + _vrndph_f16(a) +} +#[doc = "Floating-point round to integral exact, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintx))] +pub unsafe fn vrndx_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.rint.v4f16" + )] + fn _vrndx_f16(a: float16x4_t) -> float16x4_t; + } + _vrndx_f16(a) +} +#[doc = "Floating-point round to integral exact, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintx))] +pub unsafe fn vrndxq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.rint.v8f16" + )] + fn _vrndxq_f16(a: float16x8_t) -> float16x8_t; + } + _vrndxq_f16(a) +} #[doc = "Floating-point round to integral exact, using current rounding mode"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f32)"] #[doc = "## Safety"] @@ -21854,6 +25726,24 @@ pub unsafe fn vrndxq_f64(a: float64x2_t) -> float64x2_t { } _vrndxq_f64(a) } +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(frintx))] +pub unsafe fn vrndxh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.rint.f16" + )] + fn _vrndxh_f16(a: f16) -> f16; + } + _vrndxh_f16(a) +} #[doc = "Signed rounding shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_s64)"] #[doc = "## Safety"] @@ -22074,6 +25964,25 @@ pub unsafe fn vrsqrtes_f32(a: f32) -> f32 { } _vrsqrtes_f32(a) } +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(frsqrte))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrsqrteh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.f16" + )] + fn _vrsqrteh_f16(a: f16) -> f16; + } + _vrsqrteh_f16(a) +} #[doc = "Floating-point reciprocal square root step"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f64)"] #[doc = "## Safety"] @@ -22146,6 +26055,24 @@ pub unsafe fn vrsqrtss_f32(a: f32, b: f32) -> f32 { } _vrsqrtss_f32(a, b) } +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(frsqrts))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrsqrtsh_f16(a: f16, b: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.f16" + )] + fn _vrsqrtsh_f16(a: f16, b: f16) -> f16; + } + _vrsqrtsh_f16(a, b) +} #[doc = "Signed rounding shift right and accumulate."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_s64)"] #[doc = "## Safety"] @@ -23293,6 +27220,28 @@ pub unsafe fn vsqadds_u32(a: u32, b: i32) -> u32 { _vsqadds_u32(a.as_signed(), b).as_unsigned() } #[doc = "Calculates the square root of each lane."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fsqrt))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vsqrt_f16(a: float16x4_t) -> float16x4_t { + simd_fsqrt(a) +} +#[doc = "Calculates the square root of each lane."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(test, assert_instr(fsqrt))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vsqrtq_f16(a: float16x8_t) -> float16x8_t { + simd_fsqrt(a) +} +#[doc = "Calculates the square root of each lane."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -23336,6 +27285,24 @@ pub unsafe fn vsqrt_f64(a: float64x1_t) -> float64x1_t { pub unsafe fn vsqrtq_f64(a: float64x2_t) -> float64x2_t { simd_fsqrt(a) } +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrth_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(fsqrt))] +pub unsafe fn vsqrth_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.sqrt.f16" + )] + fn _vsqrth_f16(a: f16) -> f16; + } + _vsqrth_f16(a) +} #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s8)"] #[doc = "## Safety"] @@ -23705,6 +27672,30 @@ pub unsafe fn vsrid_n_u64(a: u64, b: u64) -> u64 { transmute(vsri_n_u64::(transmute(a), transmute(b))) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) { + crate::ptr::write_unaligned(ptr.cast(), a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) { + crate::ptr::write_unaligned(ptr.cast(), a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -25003,6 +28994,17 @@ pub unsafe fn vsubd_s64(a: i64, b: i64) -> i64 { pub unsafe fn vsubd_u64(a: u64, b: u64) -> u64 { a.wrapping_sub(b) } +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vsubh_f16(a: f16, b: f16) -> f16 { + a - b +} #[doc = "Signed Subtract Long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s8)"] #[doc = "## Safety"] @@ -25821,6 +29823,28 @@ pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } #[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] +pub unsafe fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_shuffle!(a, b, [0, 4, 2, 6]) +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] +pub unsafe fn vtrn1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) +} +#[doc = "Transpose vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -26075,6 +30099,28 @@ pub unsafe fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } #[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] +pub unsafe fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_shuffle!(a, b, [1, 5, 3, 7]) +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] +pub unsafe fn vtrn2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) +} +#[doc = "Transpose vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -26669,6 +30715,28 @@ pub unsafe fn vusdotq_laneq_s32( vusdotq_s32(a, b, transmute(c)) } #[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] +pub unsafe fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_shuffle!(a, b, [0, 2, 4, 6]) +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] +pub unsafe fn vuzp1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) +} +#[doc = "Unzip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -26923,6 +30991,28 @@ pub unsafe fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } #[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] +pub unsafe fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_shuffle!(a, b, [1, 3, 5, 7]) +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] +pub unsafe fn vuzp2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) +} +#[doc = "Unzip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -27197,6 +31287,28 @@ pub unsafe fn vxarq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64 _vxarq_u64(a.as_signed(), b.as_signed(), IMM6 as i64).as_unsigned() } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] +pub unsafe fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_shuffle!(a, b, [0, 4, 1, 5]) +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] +pub unsafe fn vzip1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -27451,6 +31563,28 @@ pub unsafe fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { simd_shuffle!(a, b, [0, 2]) } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +pub unsafe fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_shuffle!(a, b, [2, 6, 3, 7]) +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +pub unsafe fn vzip2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index 868cb1937b..522388b627 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -594,6 +594,54 @@ pub unsafe fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2 simd_add(a, simd_cast(d)) } #[doc = "Absolute difference between the arguments of Floating"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabd) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vabd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fabd.v4f16" + )] + fn _vabd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vabd_f16(a, b) +} +#[doc = "Absolute difference between the arguments of Floating"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabd) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vabdq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fabd.v8f16" + )] + fn _vabdq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vabdq_f16(a, b) +} +#[doc = "Absolute difference between the arguments of Floating"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -1169,6 +1217,38 @@ pub unsafe fn vabdl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { simd_cast(vabd_u32(a, b)) } #[doc = "Floating-point absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabs) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vabs_f16(a: float16x4_t) -> float16x4_t { + simd_fabs(a) +} +#[doc = "Floating-point absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabs) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vabsq_f16(a: float16x8_t) -> float16x8_t { + simd_fabs(a) +} +#[doc = "Floating-point absolute value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -1400,6 +1480,54 @@ pub unsafe fn vabsq_s32(a: int32x4_t) -> int32x4_t { } _vabsq_s32(a) } +#[doc = "Floating-point absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fabs) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vabsh_f16(a: f16) -> f16 { + simd_extract!(vabs_f16(vdup_n_f16(a)), 0) +} +#[doc = "Floating-point Add (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vadd.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fadd) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_add(a, b) +} +#[doc = "Floating-point Add (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vadd.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fadd) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_add(a, b) +} #[doc = "Bitwise exclusive OR"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_p8)"] #[doc = "## Safety"] @@ -1538,6 +1666,22 @@ pub unsafe fn vadd_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t { pub unsafe fn vaddq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { simd_xor(a, b) } +#[doc = "Add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddh_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vadd.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fadd) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vaddh_f16(a: f16, b: f16) -> f16 { + a + b +} #[doc = "Bitwise exclusive OR"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_p128)"] #[doc = "## Safety"] @@ -2038,6 +2182,54 @@ pub unsafe fn vandq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { simd_and(a, b) } #[doc = "Floating-point absolute compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcage_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcage_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v4i16.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facge.v4i16.v4f16" + )] + fn _vcage_f16(a: float16x4_t, b: float16x4_t) -> int16x4_t; + } + _vcage_f16(a, b).as_unsigned() +} +#[doc = "Floating-point absolute compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcageq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcageq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v8i16.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facge.v8i16.v8f16" + )] + fn _vcageq_f16(a: float16x8_t, b: float16x8_t) -> int16x8_t; + } + _vcageq_f16(a, b).as_unsigned() +} +#[doc = "Floating-point absolute compare greater than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcage_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -2100,6 +2292,54 @@ pub unsafe fn vcageq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { _vcageq_f32(a, b).as_unsigned() } #[doc = "Floating-point absolute compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagt_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcagt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v4i16.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facgt.v4i16.v4f16" + )] + fn _vcagt_f16(a: float16x4_t, b: float16x4_t) -> int16x4_t; + } + _vcagt_f16(a, b).as_unsigned() +} +#[doc = "Floating-point absolute compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagtq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcagtq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v8i16.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.facgt.v8i16.v8f16" + )] + fn _vcagtq_f16(a: float16x8_t, b: float16x8_t) -> int16x8_t; + } + _vcagtq_f16(a, b).as_unsigned() +} +#[doc = "Floating-point absolute compare greater than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagt_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -2162,6 +2402,38 @@ pub unsafe fn vcagtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { _vcagtq_f32(a, b).as_unsigned() } #[doc = "Floating-point absolute compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcale_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcale_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + vcage_f16(b, a) +} +#[doc = "Floating-point absolute compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaleq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcaleq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + vcageq_f16(b, a) +} +#[doc = "Floating-point absolute compare less than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcale_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -2208,6 +2480,38 @@ pub unsafe fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { vcageq_f32(b, a) } #[doc = "Floating-point absolute compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcalt_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcalt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + vcagt_f16(b, a) +} +#[doc = "Floating-point absolute compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaltq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(facgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcaltq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + vcagtq_f16(b, a) +} +#[doc = "Floating-point absolute compare less than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcalt_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -2254,6 +2558,38 @@ pub unsafe fn vcaltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { vcagtq_f32(b, a) } #[doc = "Floating-point compare equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmeq) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vceq_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + simd_eq(a, b) +} +#[doc = "Floating-point compare equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmeq) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vceqq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + simd_eq(a, b) +} +#[doc = "Floating-point compare equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -2622,6 +2958,38 @@ pub unsafe fn vceqq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t { simd_eq(a, b) } #[doc = "Floating-point compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcge_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + simd_ge(a, b) +} +#[doc = "Floating-point compare greater than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgeq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + simd_ge(a, b) +} +#[doc = "Floating-point compare greater than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -2943,6 +3311,72 @@ pub unsafe fn vcge_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { pub unsafe fn vcgeq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { simd_ge(a, b) } +#[doc = "Floating-point compare greater than or equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgez_f16(a: float16x4_t) -> uint16x4_t { + let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0); + simd_ge(a, transmute(b)) +} +#[doc = "Floating-point compare greater than or equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgezq_f16(a: float16x8_t) -> uint16x8_t { + let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + simd_ge(a, transmute(b)) +} +#[doc = "Floating-point compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + simd_gt(a, b) +} +#[doc = "Floating-point compare greater than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgtq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + simd_gt(a, b) +} #[doc = "Floating-point compare greater than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_f32)"] #[doc = "## Safety"] @@ -3265,6 +3699,72 @@ pub unsafe fn vcgt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { pub unsafe fn vcgtq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { simd_gt(a, b) } +#[doc = "Floating-point compare greater than zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgtz_f16(a: float16x4_t) -> uint16x4_t { + let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0); + simd_gt(a, transmute(b)) +} +#[doc = "Floating-point compare greater than zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcgtzq_f16(a: float16x8_t) -> uint16x8_t { + let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + simd_gt(a, transmute(b)) +} +#[doc = "Floating-point compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcle_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + simd_le(a, b) +} +#[doc = "Floating-point compare less than or equal"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmge) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcleq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + simd_le(a, b) +} #[doc = "Floating-point compare less than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_f32)"] #[doc = "## Safety"] @@ -3587,6 +4087,40 @@ pub unsafe fn vcle_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { pub unsafe fn vcleq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { simd_le(a, b) } +#[doc = "Floating-point compare less than or equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcle.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmle) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vclez_f16(a: float16x4_t) -> uint16x4_t { + let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0); + simd_le(a, transmute(b)) +} +#[doc = "Floating-point compare less than or equal to zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcle.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmle) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vclezq_f16(a: float16x8_t) -> uint16x8_t { + let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + simd_le(a, transmute(b)) +} #[doc = "Count leading sign bits"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_s8)"] #[doc = "## Safety"] @@ -3912,6 +4446,38 @@ pub unsafe fn vclsq_u32(a: uint32x4_t) -> int32x4_t { vclsq_s32(transmute(a)) } #[doc = "Floating-point compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vclt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t { + simd_lt(a, b) +} +#[doc = "Floating-point compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmgt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcltq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t { + simd_lt(a, b) +} +#[doc = "Floating-point compare less than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -4233,6 +4799,40 @@ pub unsafe fn vclt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { pub unsafe fn vcltq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { simd_lt(a, b) } +#[doc = "Floating-point compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmlt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcltz_f16(a: float16x4_t) -> uint16x4_t { + let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0); + simd_lt(a, transmute(b)) +} +#[doc = "Floating-point compare less than"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclt.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcmlt) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcltzq_f16(a: float16x8_t) -> uint16x8_t { + let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + simd_lt(a, transmute(b)) +} #[doc = "Count leading zero bits"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_s8)"] #[doc = "## Safety"] @@ -4993,7 +5593,19 @@ pub unsafe fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] ) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vcombine_f16(a: float16x4_t, b: float16x4_t) -> float16x8_t { + simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) +} +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5012,7 +5624,7 @@ pub unsafe fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { pub unsafe fn vcombine_f32(a: float32x2_t, b: float32x2_t) -> float32x4_t { simd_shuffle!(a, b, [0, 1, 2, 3]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5031,7 +5643,7 @@ pub unsafe fn vcombine_f32(a: float32x2_t, b: float32x2_t) -> float32x4_t { pub unsafe fn vcombine_s8(a: int8x8_t, b: int8x8_t) -> int8x16_t { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5050,7 +5662,7 @@ pub unsafe fn vcombine_s8(a: int8x8_t, b: int8x8_t) -> int8x16_t { pub unsafe fn vcombine_s16(a: int16x4_t, b: int16x4_t) -> int16x8_t { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5069,7 +5681,7 @@ pub unsafe fn vcombine_s16(a: int16x4_t, b: int16x4_t) -> int16x8_t { pub unsafe fn vcombine_s32(a: int32x2_t, b: int32x2_t) -> int32x4_t { simd_shuffle!(a, b, [0, 1, 2, 3]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5088,7 +5700,7 @@ pub unsafe fn vcombine_s32(a: int32x2_t, b: int32x2_t) -> int32x4_t { pub unsafe fn vcombine_s64(a: int64x1_t, b: int64x1_t) -> int64x2_t { simd_shuffle!(a, b, [0, 1]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5107,7 +5719,7 @@ pub unsafe fn vcombine_s64(a: int64x1_t, b: int64x1_t) -> int64x2_t { pub unsafe fn vcombine_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5126,7 +5738,7 @@ pub unsafe fn vcombine_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t { pub unsafe fn vcombine_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x8_t { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5145,7 +5757,7 @@ pub unsafe fn vcombine_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x8_t { pub unsafe fn vcombine_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x4_t { simd_shuffle!(a, b, [0, 1, 2, 3]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5164,7 +5776,7 @@ pub unsafe fn vcombine_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x4_t { pub unsafe fn vcombine_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x2_t { simd_shuffle!(a, b, [0, 1]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5183,7 +5795,7 @@ pub unsafe fn vcombine_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x2_t { pub unsafe fn vcombine_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x16_t { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5202,7 +5814,7 @@ pub unsafe fn vcombine_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x16_t { pub unsafe fn vcombine_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x8_t { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } -#[doc = "Vector combine"] +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5222,6 +5834,41 @@ pub unsafe fn vcombine_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x2_t { simd_shuffle!(a, b, [0, 1]) } #[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcreate_f16(a: u64) -> float16x4_t { + transmute(a) +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcreate_f16(a: u64) -> float16x4_t { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5731,6 +6378,102 @@ pub unsafe fn vcreate_p16(a: u64) -> poly16x4_t { pub unsafe fn vcreate_p64(a: u64) -> poly64x1_t { transmute(a) } +#[doc = "Floating-point convert to lower precision narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f16_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +# [cfg_attr (all (test , target_arch = "arm") , assert_instr (vcvt . f16 . f32))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtn) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_f16_f32(a: float32x4_t) -> float16x4_t { + simd_cast(a) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(scvtf) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_f16_s16(a: int16x4_t) -> float16x4_t { + simd_cast(a) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(scvtf) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtq_f16_s16(a: int16x8_t) -> float16x8_t { + simd_cast(a) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ucvtf) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_f16_u16(a: uint16x4_t) -> float16x4_t { + simd_cast(a) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ucvtf) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtq_f16_u16(a: uint16x8_t) -> float16x8_t { + simd_cast(a) +} +#[doc = "Floating-point convert to higher precision long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtl) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_f32_f16(a: float16x4_t) -> float32x4_t { + simd_cast(a) +} #[doc = "Fixed-point convert to floating-point"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_s32)"] #[doc = "## Safety"] @@ -5824,6 +6567,122 @@ pub unsafe fn vcvtq_f32_u32(a: uint32x4_t) -> float32x4_t { simd_cast(a) } #[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(scvtf, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_n_f16_s16(a: int16x4_t) -> float16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxs2fp.v4f16.v4i16" + )] + fn _vcvt_n_f16_s16(a: int16x4_t, n: i32) -> float16x4_t; + } + _vcvt_n_f16_s16(a, N) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(scvtf, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtq_n_f16_s16(a: int16x8_t) -> float16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxs2fp.v8f16.v8i16" + )] + fn _vcvtq_n_f16_s16(a: int16x8_t, n: i32) -> float16x8_t; + } + _vcvtq_n_f16_s16(a, N) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ucvtf, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_n_f16_u16(a: uint16x4_t) -> float16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxu2fp.v4f16.v4i16" + )] + fn _vcvt_n_f16_u16(a: int16x4_t, n: i32) -> float16x4_t; + } + _vcvt_n_f16_u16(a.as_signed(), N) +} +#[doc = "Fixed-point convert to floating-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ucvtf, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtq_n_f16_u16(a: uint16x8_t) -> float16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfxu2fp.v8f16.v8i16" + )] + fn _vcvtq_n_f16_u16(a: int16x8_t, n: i32) -> float16x8_t; + } + _vcvtq_n_f16_u16(a.as_signed(), N) +} +#[doc = "Fixed-point convert to floating-point"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_s32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -5991,6 +6850,64 @@ pub unsafe fn vcvtq_n_f32_u32(a: uint32x4_t) -> float32x4_t { } _vcvtq_n_f32_u32(a.as_signed(), N) } +#[doc = "Floating-point convert to signed fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzs, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_n_s16_f16(a: float16x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxs.v4i16.v4f16" + )] + fn _vcvt_n_s16_f16(a: float16x4_t, n: i32) -> int16x4_t; + } + _vcvt_n_s16_f16(a, N) +} +#[doc = "Floating-point convert to signed fixed-point"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzs, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtq_n_s16_f16(a: float16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxs.v8i16.v8f16" + )] + fn _vcvtq_n_s16_f16(a: float16x8_t, n: i32) -> int16x8_t; + } + _vcvtq_n_s16_f16(a, N) +} #[doc = "Floating-point convert to fixed-point, rounding toward zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s32_f32)"] #[doc = "## Safety"] @@ -6075,6 +6992,64 @@ pub unsafe fn vcvtq_n_s32_f32(a: float32x4_t) -> int32x4_t { } _vcvtq_n_s32_f32(a, N) } +#[doc = "Fixed-point convert to unsigned fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzu, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_n_u16_f16(a: float16x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxu.v4i16.v4f16" + )] + fn _vcvt_n_u16_f16(a: float16x4_t, n: i32) -> int16x4_t; + } + _vcvt_n_u16_f16(a, N).as_unsigned() +} +#[doc = "Fixed-point convert to unsigned fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzu, N = 1) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtq_n_u16_f16(a: float16x8_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + target_arch = "arm", + link_name = "llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16" + )] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcvtfp2fxu.v8i16.v8f16" + )] + fn _vcvtq_n_u16_f16(a: float16x8_t, n: i32) -> int16x8_t; + } + _vcvtq_n_u16_f16(a, N).as_unsigned() +} #[doc = "Floating-point convert to fixed-point, rounding toward zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u32_f32)"] #[doc = "## Safety"] @@ -6160,6 +7135,38 @@ pub unsafe fn vcvtq_n_u32_f32(a: float32x4_t) -> uint32x4_t { _vcvtq_n_u32_f32(a, N).as_unsigned() } #[doc = "Floating-point convert to signed fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzs) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_s16_f16(a: float16x4_t) -> int16x4_t { + simd_cast(a) +} +#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzs) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtq_s16_f16(a: float16x8_t) -> int16x8_t { + simd_cast(a) +} +#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_s32_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -6222,6 +7229,38 @@ pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t { _vcvtq_s32_f32(a) } #[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzu) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvt_u16_f16(a: float16x4_t) -> uint16x4_t { + simd_cast(a) +} +#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fcvtzu) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vcvtq_u16_f16(a: float16x8_t) -> uint16x8_t { + simd_cast(a) +} +#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_u32_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -6528,6 +7567,46 @@ pub unsafe fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4 _vdotq_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned() } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vdup_lane_f16(a: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(N, 2); + simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 2) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vdupq_lane_f16(a: float16x4_t) -> float16x8_t { + static_assert_uimm_bits!(N, 2); + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -7073,6 +8152,46 @@ pub unsafe fn vdup_lane_u64(a: uint64x1_t) -> uint64x1_t { a } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vdup_laneq_f16(a: float16x8_t) -> float16x4_t { + static_assert_uimm_bits!(N, 3); + simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) +} +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup, N = 4) +)] +#[rustc_legacy_const_generics(1)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vdupq_laneq_f16(a: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(N, 3); + simd_shuffle!( + a, + a, + [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] + ) +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -7617,6 +8736,38 @@ pub unsafe fn vdup_laneq_u64(a: uint64x2_t) -> uint64x1_t { static_assert_uimm_bits!(N, 1); transmute::(simd_extract!(a, N as u32)) } +#[doc = "Create a new vector with all lanes set to a value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vdup_n_f16(a: f16) -> float16x4_t { + float16x4_t::splat(a) +} +#[doc = "Create a new vector with all lanes set to a value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vdupq_n_f16(a: f16) -> float16x8_t { + float16x8_t::splat(a) +} #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s64)"] #[doc = "## Safety"] @@ -8086,6 +9237,30 @@ pub unsafe fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { simd_xor(a, b) } #[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vext_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(N, 2); + match N & 0b11 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } +} +#[doc = "Extract vector from pair of vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -8383,6 +9558,34 @@ pub unsafe fn vextq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_ } } #[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vextq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(N, 3); + match N & 0b111 { + 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } +} +#[doc = "Extract vector from pair of vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -8935,6 +10138,48 @@ pub unsafe fn vextq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t _ => unreachable_unchecked(), } } +#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmla) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfma_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v4f16")] + #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v4f16")] + fn _vfma_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + _vfma_f16(b, c, a) +} +#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmla) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v8f16")] + #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v8f16")] + fn _vfmaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + _vfmaq_f16(b, c, a) +} #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f32)"] #[doc = "## Safety"] @@ -9038,6 +10283,42 @@ pub unsafe fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t vfmaq_f32(a, b, vdupq_n_f32_vfp4(c)) } #[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmls) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfms_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + let b: float16x4_t = simd_neg(b); + vfma_f16(a, b, c) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmls) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vfmsq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + let b: float16x8_t = simd_neg(b); + vfmaq_f16(a, b, c) +} +#[doc = "Floating-point fused multiply-subtract from accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -9131,6 +10412,66 @@ pub unsafe fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t pub unsafe fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { vfmsq_f32(a, b, vdupq_n_f32_vfp4(c)) } +#[doc = "Duplicate vector element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vget_high_f16(a: float16x8_t) -> float16x4_t { + simd_shuffle!(a, a, [4, 5, 6, 7]) +} +#[doc = "Duplicate vector element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vget_low_f16(a: float16x8_t) -> float16x4_t { + simd_shuffle!(a, a, [0, 1, 2, 3]) +} +#[doc = "Duplicate vector element to scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vget_lane_f16(a: float16x4_t) -> f16 { + static_assert_uimm_bits!(LANE, 2); + simd_extract!(a, LANE as u32) +} +#[doc = "Duplicate vector element to scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vgetq_lane_f16(a: float16x8_t) -> f16 { + static_assert_uimm_bits!(LANE, 3); + simd_extract!(a, LANE as u32) +} #[doc = "Halving add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s8)"] #[doc = "## Safety"] @@ -9875,6 +11216,254 @@ pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { } _vhsubq_u32(a.as_signed(), b.as_signed()).as_unsigned() } +#[doc = "Load one single-element structure and replicate to all lanes of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t { + let x: float16x4_t = vld1_lane_f16::<0>(ptr, transmute(f16x4::splat(0.))); + simd_shuffle!(x, x, [0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and replicate to all lanes of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t { + let x: float16x8_t = vld1q_lane_f16::<0>(ptr, transmute(f16x8::splat(0.))); + simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t { + transmute(vld1_v4f16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t { + let ret_val: float16x4_t = transmute(vld1_v4f16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t { + transmute(vld1q_v8f16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t { + let ret_val: float16x8_t = transmute(vld1q_v8f16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_f16_x2(a: *const f16) -> float16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x2.v4f16.p0f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4f16.p0f16")] + fn _vld1_f16_x2(a: *const f16) -> float16x4x2_t; + } + _vld1_f16_x2(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_f16_x3(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x3.v4f16.p0f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4f16.p0f16")] + fn _vld1_f16_x3(a: *const f16) -> float16x4x3_t; + } + _vld1_f16_x3(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_f16_x4(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x4.v4f16.p0f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4f16.p0f16")] + fn _vld1_f16_x4(a: *const f16) -> float16x4x4_t; + } + _vld1_f16_x4(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_f16_x2(a: *const f16) -> float16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x2.v8f16.p0f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8f16.p0f16")] + fn _vld1q_f16_x2(a: *const f16) -> float16x8x2_t; + } + _vld1q_f16_x2(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_f16_x3(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x3.v8f16.p0f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8f16.p0f16")] + fn _vld1q_f16_x3(a: *const f16) -> float16x8x3_t; + } + _vld1q_f16_x3(a) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_f16_x4(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld1x4.v8f16.p0f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8f16.p0f16")] + fn _vld1q_f16_x4(a: *const f16) -> float16x8x4_t; + } + _vld1q_f16_x4(a) +} #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"] #[doc = "## Safety"] @@ -10546,6 +12135,42 @@ pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t { } _vld1q_f32_x4(a) } +#[doc = "Load one single-element structure to one lane of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1_lane_f16(ptr: *const f16, src: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld1q_lane_f16(ptr: *const f16, src: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) +} #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64)"] #[doc = "## Safety"] @@ -13636,6 +15261,118 @@ unsafe fn vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t { } _vld1q_v8i16(a, b) } +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v4f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +unsafe fn vld1_v4f16(a: *const i8, b: i32) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f16")] + fn _vld1_v4f16(a: *const i8, b: i32) -> float16x4_t; + } + _vld1_v4f16(a, b) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v8f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +unsafe fn vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8f16")] + fn _vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t; + } + _vld1q_v8f16(a, b) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f16.p0f16")] + fn _vld2_dup_f16(ptr: *const f16, size: i32) -> float16x4x2_t; + } + _vld2_dup_f16(a as _, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8f16.p0f16")] + fn _vld2q_dup_f16(ptr: *const f16, size: i32) -> float16x8x2_t; + } + _vld2q_dup_f16(a as _, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v4f16.p0f16" + )] + fn _vld2_dup_f16(ptr: *const f16) -> float16x4x2_t; + } + _vld2_dup_f16(a as _) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v8f16.p0f16" + )] + fn _vld2q_dup_f16(ptr: *const f16) -> float16x8x2_t; + } + _vld2q_dup_f16(a as _) +} #[doc = "Load single 2-element structure and replicate to all lanes of two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"] #[doc = "## Safety"] @@ -14523,6 +16260,84 @@ pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t { ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); ret_val } +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f16.p0f16")] + fn _vld2_f16(ptr: *const f16, size: i32) -> float16x4x2_t; + } + _vld2_f16(a as _, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8f16.p0f16")] + fn _vld2q_f16(ptr: *const f16, size: i32) -> float16x8x2_t; + } + _vld2q_f16(a as _, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v4f16.p0f16" + )] + fn _vld2_f16(ptr: *const f16) -> float16x4x2_t; + } + _vld2_f16(a as _) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v8f16.p0f16" + )] + fn _vld2q_f16(ptr: *const f16) -> float16x8x2_t; + } + _vld2q_f16(a as _) +} #[doc = "Load multiple 2-element structures to two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"] #[doc = "## Safety"] @@ -14804,6 +16619,110 @@ pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t { _vld2q_s32(a as _) } #[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_lane_f16(a: *const f16, b: float16x4x2_t) -> float16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f16.p0")] + fn _vld2_lane_f16( + ptr: *const f16, + a: float16x4_t, + b: float16x4_t, + n: i32, + size: i32, + ) -> float16x4x2_t; + } + _vld2_lane_f16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_lane_f16(a: *const f16, b: float16x8x2_t) -> float16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8f16.p0")] + fn _vld2q_lane_f16( + ptr: *const f16, + a: float16x8_t, + b: float16x8_t, + n: i32, + size: i32, + ) -> float16x8x2_t; + } + _vld2q_lane_f16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2_lane_f16(a: *const f16, b: float16x4x2_t) -> float16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v4f16.p0" + )] + fn _vld2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *const f16) + -> float16x4x2_t; + } + _vld2_lane_f16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld2q_lane_f16(a: *const f16, b: float16x8x2_t) -> float16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v8f16.p0" + )] + fn _vld2q_lane_f16( + a: float16x8_t, + b: float16x8_t, + n: i64, + ptr: *const f16, + ) -> float16x8x2_t; + } + _vld2q_lane_f16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -15921,6 +17840,84 @@ pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t { ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); ret_val } +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f16.p0f16")] + fn _vld3_dup_f16(ptr: *const f16, size: i32) -> float16x4x3_t; + } + _vld3_dup_f16(a as _, 2) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8f16.p0f16")] + fn _vld3q_dup_f16(ptr: *const f16, size: i32) -> float16x8x3_t; + } + _vld3q_dup_f16(a as _, 2) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v4f16.p0f16" + )] + fn _vld3_dup_f16(ptr: *const f16) -> float16x4x3_t; + } + _vld3_dup_f16(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v8f16.p0f16" + )] + fn _vld3q_dup_f16(ptr: *const f16) -> float16x8x3_t; + } + _vld3q_dup_f16(a as _) +} #[doc = "Load single 3-element structure and replicate to all lanes of three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"] #[doc = "## Safety"] @@ -16826,6 +18823,84 @@ pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t { ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); ret_val } +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f16.p0f16")] + fn _vld3_f16(ptr: *const f16, size: i32) -> float16x4x3_t; + } + _vld3_f16(a as _, 2) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8f16.p0f16")] + fn _vld3q_f16(ptr: *const f16, size: i32) -> float16x8x3_t; + } + _vld3q_f16(a as _, 2) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3.v4f16.p0f16" + )] + fn _vld3_f16(ptr: *const f16) -> float16x4x3_t; + } + _vld3_f16(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3.v8f16.p0f16" + )] + fn _vld3q_f16(ptr: *const f16) -> float16x8x3_t; + } + _vld3q_f16(a as _) +} #[doc = "Load multiple 3-element structures to three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"] #[doc = "## Safety"] @@ -17106,6 +19181,118 @@ pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t { } _vld3q_s32(a as *const i8, 4) } +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_lane_f16(a: *const f16, b: float16x4x3_t) -> float16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f16.p0")] + fn _vld3_lane_f16( + ptr: *const f16, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + n: i32, + size: i32, + ) -> float16x4x3_t; + } + _vld3_lane_f16(a as _, b.0, b.1, b.2, LANE, 2) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_lane_f16(a: *const f16, b: float16x8x3_t) -> float16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8f16.p0")] + fn _vld3q_lane_f16( + ptr: *const f16, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + n: i32, + size: i32, + ) -> float16x8x3_t; + } + _vld3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 2) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3_lane_f16(a: *const f16, b: float16x4x3_t) -> float16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v4f16.p0" + )] + fn _vld3_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + n: i64, + ptr: *const f16, + ) -> float16x4x3_t; + } + _vld3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld3, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld3q_lane_f16(a: *const f16, b: float16x8x3_t) -> float16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v8f16.p0" + )] + fn _vld3q_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + n: i64, + ptr: *const f16, + ) -> float16x8x3_t; + } + _vld3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) +} #[doc = "Load multiple 3-element structures to three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"] #[doc = "## Safety"] @@ -18295,6 +20482,84 @@ pub unsafe fn vld3q_lane_f32(a: *const f32, b: float32x4x3_t) - } _vld3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) } +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f16.p0f16")] + fn _vld4_dup_f16(ptr: *const f16, size: i32) -> float16x4x4_t; + } + _vld4_dup_f16(a as _, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8f16.p0f16")] + fn _vld4q_dup_f16(ptr: *const f16, size: i32) -> float16x8x4_t; + } + _vld4q_dup_f16(a as _, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v4f16.p0f16" + )] + fn _vld4_dup_f16(ptr: *const f16) -> float16x4x4_t; + } + _vld4_dup_f16(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4r) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v8f16.p0f16" + )] + fn _vld4q_dup_f16(ptr: *const f16) -> float16x8x4_t; + } + _vld4q_dup_f16(a as _) +} #[doc = "Load single 4-element structure and replicate to all lanes of four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"] #[doc = "## Safety"] @@ -19218,6 +21483,84 @@ pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t { ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]); ret_val } +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f16.p0f16")] + fn _vld4_f16(ptr: *const f16, size: i32) -> float16x4x4_t; + } + _vld4_f16(a as _, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8f16.p0f16")] + fn _vld4q_f16(ptr: *const f16, size: i32) -> float16x8x4_t; + } + _vld4q_f16(a as _, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4.v4f16.p0f16" + )] + fn _vld4_f16(ptr: *const f16) -> float16x4x4_t; + } + _vld4_f16(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4.v8f16.p0f16" + )] + fn _vld4q_f16(ptr: *const f16) -> float16x8x4_t; + } + _vld4q_f16(a as _) +} #[doc = "Load multiple 4-element structures to four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"] #[doc = "## Safety"] @@ -19498,6 +21841,122 @@ pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t { } _vld4q_s32(a as *const i8, 4) } +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_lane_f16(a: *const f16, b: float16x4x4_t) -> float16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f16.p0")] + fn _vld4_lane_f16( + ptr: *const f16, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i32, + size: i32, + ) -> float16x4x4_t; + } + _vld4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_lane_f16(a: *const f16, b: float16x8x4_t) -> float16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8f16.p0")] + fn _vld4q_lane_f16( + ptr: *const f16, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i32, + size: i32, + ) -> float16x8x4_t; + } + _vld4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4_lane_f16(a: *const f16, b: float16x4x4_t) -> float16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v4f16.p0" + )] + fn _vld4_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i64, + ptr: *const f16, + ) -> float16x4x4_t; + } + _vld4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld4, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vld4q_lane_f16(a: *const f16, b: float16x8x4_t) -> float16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v8f16.p0" + )] + fn _vld4q_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i64, + ptr: *const f16, + ) -> float16x8x4_t; + } + _vld4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} #[doc = "Load multiple 4-element structures to four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"] #[doc = "## Safety"] @@ -20720,6 +23179,54 @@ pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t { ret_val } #[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmax) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.v4f16" + )] + fn _vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vmax_f16(a, b) +} +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmax) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.v8f16" + )] + fn _vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vmaxq_f16(a, b) +} +#[doc = "Maximum (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -21154,6 +23661,54 @@ pub unsafe fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { _vmaxq_u32(a.as_signed(), b.as_signed()).as_unsigned() } #[doc = "Floating-point Maximum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmaxnm) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.v4f16" + )] + fn _vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vmaxnm_f16(a, b) +} +#[doc = "Floating-point Maximum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmaxnm) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.v8f16" + )] + fn _vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vmaxnmq_f16(a, b) +} +#[doc = "Floating-point Maximum Number (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -21216,6 +23771,54 @@ pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { _vmaxnmq_f32(a, b) } #[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmin) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.v4f16" + )] + fn _vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vmin_f16(a, b) +} +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmin) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.v8f16" + )] + fn _vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vminq_f16(a, b) +} +#[doc = "Minimum (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -21650,6 +24253,54 @@ pub unsafe fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { _vminq_u32(a.as_signed(), b.as_signed()).as_unsigned() } #[doc = "Floating-point Minimum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fminnm) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.v4f16" + )] + fn _vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vminnm_f16(a, b) +} +#[doc = "Floating-point Minimum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fminnm) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.v8f16" + )] + fn _vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vminnmq_f16(a, b) +} +#[doc = "Floating-point Minimum Number (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -25209,6 +27860,70 @@ pub unsafe fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x } _vmmlaq_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned() } +#[doc = "Duplicate element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmov_n_f16(a: f16) -> float16x4_t { + vdup_n_f16(a) +} +#[doc = "Duplicate element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmovq_n_f16(a: f16) -> float16x8_t { + vdupq_n_f16(a) +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmul_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_mul(a, b) +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_mul(a, b) +} #[doc = "Multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f32)"] #[doc = "## Safety"] @@ -25255,6 +27970,61 @@ pub unsafe fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { simd_mul(a, b) } +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmul_lane_f16(a: float16x4_t, v: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_mul( + a, + simd_shuffle!(v, v, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), + ) +} +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul, LANE = 1) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulq_lane_f16(a: float16x8_t, v: float16x4_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + simd_mul( + a, + simd_shuffle!( + v, + v, + [ + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32, + LANE as u32 + ] + ), + ) +} #[doc = "Floating-point multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)"] #[doc = "## Safety"] @@ -25850,6 +28620,38 @@ pub unsafe fn vmulq_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> ) } #[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmul_n_f16(a: float16x4_t, b: f16) -> float16x4_t { + simd_mul(a, vdup_n_f16(b)) +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vmulq_n_f16(a: float16x8_t, b: f16) -> float16x8_t { + simd_mul(a, vdupq_n_f16(b)) +} +#[doc = "Vector multiply by scalar"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -26939,6 +29741,38 @@ pub unsafe fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { _vmull_u32(a.as_signed(), b.as_signed()).as_unsigned() } #[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fneg) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vneg_f16(a: float16x4_t) -> float16x4_t { + simd_neg(a) +} +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fneg) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vnegq_f16(a: float16x8_t) -> float16x8_t { + simd_neg(a) +} +#[doc = "Negate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -27875,6 +30709,30 @@ pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { x } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(faddp) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.faddp.v4f16" + )] + fn _vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vpadd_f16(a, b) +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -34623,344 +37481,2244 @@ pub unsafe fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { } _vraddhn_s64(a, b) } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"] +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + transmute(vraddhn_s16(transmute(a), transmute(b))) +} +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(vraddhn_s16(transmute(a), transmute(b))); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + transmute(vraddhn_s32(transmute(a), transmute(b))) +} +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = transmute(vraddhn_s32(transmute(a), transmute(b))); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + transmute(vraddhn_s64(transmute(a), transmute(b))) +} +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(raddhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = transmute(vraddhn_s64(transmute(a), transmute(b))); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecpe) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrecpe_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v4f16" + )] + fn _vrecpe_f16(a: float16x4_t) -> float16x4_t; + } + _vrecpe_f16(a) +} +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecpe) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrecpeq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v8f16" + )] + fn _vrecpeq_f16(a: float16x8_t) -> float16x8_t; + } + _vrecpeq_f16(a) +} +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecpe) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v2f32" + )] + fn _vrecpe_f32(a: float32x2_t) -> float32x2_t; + } + _vrecpe_f32(a) +} +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecpe) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v4f32" + )] + fn _vrecpeq_f32(a: float32x4_t) -> float32x4_t; + } + _vrecpeq_f32(a) +} +#[doc = "Unsigned reciprocal estimate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urecpe) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urecpe.v2i32" + )] + fn _vrecpe_u32(a: int32x2_t) -> int32x2_t; + } + _vrecpe_u32(a.as_signed()).as_unsigned() +} +#[doc = "Unsigned reciprocal estimate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(urecpe) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urecpe.v4i32" + )] + fn _vrecpeq_u32(a: int32x4_t) -> int32x4_t; + } + _vrecpeq_u32(a.as_signed()).as_unsigned() +} +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecps) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v4f16" + )] + fn _vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vrecps_f16(a, b) +} +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecps) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v8f16" + )] + fn _vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vrecpsq_f16(a, b) +} +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecps) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v2f32" + )] + fn _vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + _vrecps_f32(a, b) +} +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frecps) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v4f32" + )] + fn _vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + _vrecpsq_f32(a, b) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint32x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: poly8x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: poly16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint32x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = transmute(a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t { + let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t { + let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + assert_instr(nop) )] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) )] -pub unsafe fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { - transmute(vraddhn_s16(transmute(a), transmute(b))) +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t { + transmute(a) } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"] +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(nop) )] -pub unsafe fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vraddhn_s16(transmute(a), transmute(b))); +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t { + let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"] +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(nop) )] -pub unsafe fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { - transmute(vraddhn_s32(transmute(a), transmute(b))) +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t { + transmute(a) } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"] +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(nop) )] -pub unsafe fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(vraddhn_s32(transmute(a), transmute(b))); +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x4_t = transmute(a); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"] +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(nop) )] -pub unsafe fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { - transmute(vraddhn_s64(transmute(a), transmute(b))) +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t { + transmute(a) } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"] +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(nop) )] -pub unsafe fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); - let ret_val: uint32x2_t = transmute(vraddhn_s64(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [1, 0]) +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } -#[doc = "Reciprocal estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f32)"] +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecpe) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(nop) )] -pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecpe.v2f32" - )] - fn _vrecpe_f32(a: float32x2_t) -> float32x2_t; - } - _vrecpe_f32(a) +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_p128(a: p128) -> float16x8_t { + transmute(a) } -#[doc = "Reciprocal estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f32)"] +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecpe) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(nop) )] -pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecpe.v4f32" - )] - fn _vrecpeq_f32(a: float32x4_t) -> float32x4_t; - } - _vrecpeq_f32(a) +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_p128(a: p128) -> float16x8_t { + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } -#[doc = "Unsigned reciprocal estimate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_u32)"] +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urecpe) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(nop) )] -pub unsafe fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urecpe.v2i32" - )] - fn _vrecpe_u32(a: int32x2_t) -> int32x2_t; - } - _vrecpe_u32(a.as_signed()).as_unsigned() +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { + transmute(a) } -#[doc = "Unsigned reciprocal estimate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_u32)"] +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urecpe) + assert_instr(nop) )] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) )] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) )] -pub unsafe fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urecpe.v4i32" - )] - fn _vrecpeq_u32(a: int32x4_t) -> int32x4_t; - } - _vrecpeq_u32(a.as_signed()).as_unsigned() +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + transmute(a) } -#[doc = "Floating-point reciprocal step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f32)"] +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecps) + assert_instr(nop) )] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) )] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly64x2_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) )] -pub unsafe fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecps.v2f32" - )] - fn _vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - } - _vrecps_f32(a, b) +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { + transmute(a) } -#[doc = "Floating-point reciprocal step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f32)"] +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecps) + assert_instr(nop) )] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { + let ret_val: float16x4_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) )] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { + transmute(a) +} +#[doc = "Vector reinterpret cast operation"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop) )] -pub unsafe fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecps.v4f32" - )] - fn _vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - } - _vrecpsq_f32(a, b) +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float16x8_t = transmute(a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"] @@ -48776,6 +53534,38 @@ pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t { let ret_val: poly16x8_t = transmute(a); simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } +#[doc = "Reverse elements in 64-bit doublewords"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrev64_f16(a: float16x4_t) -> float16x4_t { + simd_shuffle!(a, a, [3, 2, 1, 0]) +} +#[doc = "Reverse elements in 64-bit doublewords"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(rev64) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrev64q_f16(a: float16x8_t) -> float16x8_t { + simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) +} #[doc = "Rounding halving add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s8)"] #[doc = "## Safety"] @@ -49149,6 +53939,54 @@ pub unsafe fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { _vrhaddq_u32(a.as_signed(), b.as_signed()).as_unsigned() } #[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frintn) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrndn_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frintn.v4f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f16")] + fn _vrndn_f16(a: float16x4_t) -> float16x4_t; + } + _vrndn_f16(a) +} +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frintn) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrndnq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frintn.v8f16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v8f16")] + fn _vrndnq_f16(a: float16x8_t) -> float16x8_t; + } + _vrndnq_f16(a) +} +#[doc = "Floating-point round to integral, to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -50310,6 +55148,54 @@ pub unsafe fn vrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { transmute(vrshrn_n_s64::(transmute(a))) } #[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frsqrte) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrsqrte_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v4f16" + )] + fn _vrsqrte_f16(a: float16x4_t) -> float16x4_t; + } + _vrsqrte_f16(a) +} +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frsqrte) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrsqrteq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v8f16" + )] + fn _vrsqrteq_f16(a: float16x8_t) -> float16x8_t; + } + _vrsqrteq_f16(a) +} +#[doc = "Reciprocal square-root estimate."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -50434,6 +55320,54 @@ pub unsafe fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t { _vrsqrteq_u32(a.as_signed()).as_unsigned() } #[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frsqrts) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v4f16" + )] + fn _vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + _vrsqrts_f16(a, b) +} +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(frsqrts) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v8f16" + )] + fn _vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + _vrsqrtsq_f16(a, b) +} +#[doc = "Floating-point reciprocal square root step"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -51142,6 +56076,42 @@ pub unsafe fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { simd_shuffle!(ret_val, ret_val, [1, 0]) } #[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vset_lane_f16(a: f16, b: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(b, LANE as u32, a) +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vsetq_lane_f16(a: f16, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(b, LANE as u32, a) +} +#[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -54836,6 +59806,280 @@ pub unsafe fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x )) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) { + vst1_v4f16( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) { + vst1q_v8f16( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0f16.v4f16")] + fn _vst1_f16_x2(ptr: *mut f16, a: float16x4_t, b: float16x4_t); + } + _vst1_f16_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0f16.v8f16")] + fn _vst1q_f16_x2(ptr: *mut f16, a: float16x8_t, b: float16x8_t); + } + _vst1q_f16_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v4f16.p0f16" + )] + fn _vst1_f16_x2(a: float16x4_t, b: float16x4_t, ptr: *mut f16); + } + _vst1_f16_x2(b.0, b.1, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v8f16.p0f16" + )] + fn _vst1q_f16_x2(a: float16x8_t, b: float16x8_t, ptr: *mut f16); + } + _vst1q_f16_x2(b.0, b.1, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0f16.v4f16")] + fn _vst1_f16_x3(ptr: *mut f16, a: float16x4_t, b: float16x4_t, c: float16x4_t); + } + _vst1_f16_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0f16.v8f16")] + fn _vst1q_f16_x3(ptr: *mut f16, a: float16x8_t, b: float16x8_t, c: float16x8_t); + } + _vst1q_f16_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v4f16.p0f16" + )] + fn _vst1_f16_x3(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut f16); + } + _vst1_f16_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v8f16.p0f16" + )] + fn _vst1q_f16_x3(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut f16); + } + _vst1q_f16_x3(b.0, b.1, b.2, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f16.v4f16")] + fn _vst1_f16_x4( + ptr: *mut f16, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + ); + } + _vst1_f16_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f16.v8f16")] + fn _vst1q_f16_x4( + ptr: *mut f16, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + ); + } + _vst1q_f16_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v4f16.p0f16" + )] + fn _vst1_f16_x4( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + ptr: *mut f16, + ); + } + _vst1_f16_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v8f16.p0f16" + )] + fn _vst1q_f16_x4( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + ptr: *mut f16, + ); + } + _vst1q_f16_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -55286,38 +60530,6 @@ pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) { #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst1))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.v2f32.p0")] - fn _vst1_f32_x3(ptr: *mut f32, a: float32x2_t, b: float32x2_t, c: float32x2_t); - } - _vst1_f32_x3(a, b.0, b.1, b.2) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst1))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.v4f32.p0")] - fn _vst1q_f32_x3(ptr: *mut f32, a: float32x4_t, b: float32x4_t, c: float32x4_t); - } - _vst1q_f32_x3(a, b.0, b.1, b.2) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] #[cfg_attr(test, assert_instr(st1))] @@ -55446,6 +60658,42 @@ pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) { _vst1q_f32_x4(b.0, b.1, b.2, b.3, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1_lane_f16(a: *mut f16, b: float16x4_t) { + static_assert_uimm_bits!(LANE, 2); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst1q_lane_f16(a: *mut f16, b: float16x8_t) { + static_assert_uimm_bits!(LANE, 3); + *a = simd_extract!(b, LANE as u32); +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -57996,6 +63244,40 @@ unsafe fn vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32) { } _vst1q_v8i16(addr, val, align) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +unsafe fn vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f16.p0")] + fn _vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32); + } + _vst1_v4f16(addr, val, align) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v8f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +unsafe fn vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8f16.p0")] + fn _vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32); + } + _vst1q_v8f16(addr, val, align) +} #[doc = "Store multiple single-element structures from one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p64)"] #[doc = "## Safety"] @@ -58022,6 +63304,78 @@ pub unsafe fn vst1q_lane_p64(a: *mut p64, b: poly64x2_t) { *a = simd_extract!(b, LANE as u32); } #[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v4f16.p0i8" + )] + fn _vst2_f16(a: float16x4_t, b: float16x4_t, ptr: *mut i8); + } + _vst2_f16(b.0, b.1, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v8f16.p0i8" + )] + fn _vst2q_f16(a: float16x8_t, b: float16x8_t, ptr: *mut i8); + } + _vst2q_f16(b.0, b.1, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4f16")] + fn _vst2_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, size: i32); + } + _vst2_f16(a as _, b.0, b.1, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v8f16")] + fn _vst2q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, size: i32); + } + _vst2q_f16(a as _, b.0, b.1, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -58302,6 +63656,86 @@ pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) { _vst2q_s32(a as _, b.0, b.1, 4) } #[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst2_lane_f16(a: *mut f16, b: float16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v4f16.p0i8" + )] + fn _vst2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *mut i8); + } + _vst2_lane_f16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst2q_lane_f16(a: *mut f16, b: float16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v8f16.p0i8" + )] + fn _vst2q_lane_f16(a: float16x8_t, b: float16x8_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_f16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst2_lane_f16(a: *mut f16, b: float16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4f16")] + fn _vst2_lane_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, n: i32, size: i32); + } + _vst2_lane_f16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst2q_lane_f16(a: *mut f16, b: float16x8x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v8f16")] + fn _vst2q_lane_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, n: i32, size: i32); + } + _vst2q_lane_f16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -59086,6 +64520,78 @@ pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) { vst2q_s16(transmute(a), transmute(b)) } #[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v4f16")] + fn _vst3_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, c: float16x4_t, size: i32); + } + _vst3_f16(a as _, b.0, b.1, b.2, 2) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v8f16")] + fn _vst3q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, c: float16x8_t, size: i32); + } + _vst3q_f16(a as _, b.0, b.1, b.2, 2) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v4f16.p0i8" + )] + fn _vst3_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut i8); + } + _vst3_f16(b.0, b.1, b.2, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v8f16.p0i8" + )] + fn _vst3q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut i8); + } + _vst3q_f16(b.0, b.1, b.2, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -59366,6 +64872,100 @@ pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { _vst3q_s32(b.0, b.1, b.2, a as _) } #[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst3_lane_f16(a: *mut f16, b: float16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v4f16")] + fn _vst3_lane_f16( + ptr: *mut i8, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + n: i32, + size: i32, + ); + } + _vst3_lane_f16(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst3q_lane_f16(a: *mut f16, b: float16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v8f16")] + fn _vst3q_lane_f16( + ptr: *mut i8, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + n: i32, + size: i32, + ); + } + _vst3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst3_lane_f16(a: *mut f16, b: float16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v4f16.p0i8" + )] + fn _vst3_lane_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, n: i64, ptr: *mut i8); + } + _vst3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst3q_lane_f16(a: *mut f16, b: float16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v8f16.p0i8" + )] + fn _vst3q_lane_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, n: i64, ptr: *mut i8); + } + _vst3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -60192,6 +65792,92 @@ pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) { vst3q_s16(transmute(a), transmute(b)) } #[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v4f16")] + fn _vst4_f16( + ptr: *mut i8, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + size: i32, + ); + } + _vst4_f16(a as _, b.0, b.1, b.2, b.3, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v8f16")] + fn _vst4q_f16( + ptr: *mut i8, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + size: i32, + ); + } + _vst4q_f16(a as _, b.0, b.1, b.2, b.3, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4.v4f16.p0i8" + )] + fn _vst4_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, d: float16x4_t, ptr: *mut i8); + } + _vst4_f16(b.0, b.1, b.2, b.3, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4.v8f16.p0i8" + )] + fn _vst4q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, d: float16x8_t, ptr: *mut i8); + } + _vst4q_f16(b.0, b.1, b.2, b.3, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -60521,6 +66207,116 @@ pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { _vst4q_s32(b.0, b.1, b.2, b.3, a as _) } #[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst4_lane_f16(a: *mut f16, b: float16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v4f16")] + fn _vst4_lane_f16( + ptr: *mut i8, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i32, + size: i32, + ); + } + _vst4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst4q_lane_f16(a: *mut f16, b: float16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v8f16")] + fn _vst4q_lane_f16( + ptr: *mut i8, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i32, + size: i32, + ); + } + _vst4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst4_lane_f16(a: *mut f16, b: float16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v4f16.p0i8" + )] + fn _vst4_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vst4q_lane_f16(a: *mut f16, b: float16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st4lane.v8f16.p0i8" + )] + fn _vst4q_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i64, + ptr: *mut i8, + ); + } + _vst4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -61410,6 +67206,38 @@ pub unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t) { vst4q_s16(transmute(a), transmute(b)) } #[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fsub) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vsub_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + simd_sub(a, b) +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fsub) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vsubq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + simd_sub(a, b) +} +#[doc = "Subtract"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -63384,6 +69212,42 @@ pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } #[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { + let a1: float16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: float16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) +} +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vtrnq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { + let a1: float16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: float16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) +} +#[doc = "Transpose elements"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -64413,6 +70277,42 @@ pub unsafe fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4 _vusmmlaq_s32(a, b.as_signed(), c) } #[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { + let a0: float16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: float16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { + let a0: float16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: float16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) +} +#[doc = "Unzip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] @@ -64887,6 +70787,42 @@ pub unsafe fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { transmute((a0, b0)) } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { + let a0: float16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: float16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + transmute((a0, b0)) +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { + let a0: float16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: float16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + transmute((a0, b0)) +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] diff --git a/crates/core_arch/src/arm_shared/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs index 623d39c398..cf24be9a74 100644 --- a/crates/core_arch/src/arm_shared/neon/mod.rs +++ b/crates/core_arch/src/arm_shared/neon/mod.rs @@ -52,62 +52,62 @@ types! { #![cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))] #![cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] - /// ARM-specific 64-bit wide vector of eight packed `i8`. + /// Arm-specific 64-bit wide vector of eight packed `i8`. pub struct int8x8_t(8 x pub(crate) i8); - /// ARM-specific 64-bit wide vector of eight packed `u8`. + /// Arm-specific 64-bit wide vector of eight packed `u8`. pub struct uint8x8_t(8 x pub(crate) u8); - /// ARM-specific 64-bit wide polynomial vector of eight packed `p8`. + /// Arm-specific 64-bit wide polynomial vector of eight packed `p8`. pub struct poly8x8_t(8 x pub(crate) p8); - /// ARM-specific 64-bit wide vector of four packed `i16`. + /// Arm-specific 64-bit wide vector of four packed `i16`. pub struct int16x4_t(4 x pub(crate) i16); - /// ARM-specific 64-bit wide vector of four packed `u16`. + /// Arm-specific 64-bit wide vector of four packed `u16`. pub struct uint16x4_t(4 x pub(crate) u16); - // FIXME: ARM-specific 64-bit wide vector of four packed `f16`. - // pub struct float16x4_t(f16, f16, f16, f16); - /// ARM-specific 64-bit wide vector of four packed `p16`. + // Arm-specific 64-bit wide vector of four packed `f16`. + pub struct float16x4_t(4 x pub(crate) f16); + /// Arm-specific 64-bit wide vector of four packed `p16`. pub struct poly16x4_t(4 x pub(crate) p16); - /// ARM-specific 64-bit wide vector of two packed `i32`. + /// Arm-specific 64-bit wide vector of two packed `i32`. pub struct int32x2_t(2 x pub(crate) i32); - /// ARM-specific 64-bit wide vector of two packed `u32`. + /// Arm-specific 64-bit wide vector of two packed `u32`. pub struct uint32x2_t(2 x pub(crate) u32); - /// ARM-specific 64-bit wide vector of two packed `f32`. + /// Arm-specific 64-bit wide vector of two packed `f32`. pub struct float32x2_t(2 x pub(crate) f32); - /// ARM-specific 64-bit wide vector of one packed `i64`. + /// Arm-specific 64-bit wide vector of one packed `i64`. pub struct int64x1_t(1 x pub(crate) i64); - /// ARM-specific 64-bit wide vector of one packed `u64`. + /// Arm-specific 64-bit wide vector of one packed `u64`. pub struct uint64x1_t(1 x pub(crate) u64); - /// ARM-specific 64-bit wide vector of one packed `p64`. + /// Arm-specific 64-bit wide vector of one packed `p64`. pub struct poly64x1_t(1 x pub(crate) p64); - /// ARM-specific 128-bit wide vector of sixteen packed `i8`. + /// Arm-specific 128-bit wide vector of sixteen packed `i8`. pub struct int8x16_t(16 x pub(crate) i8); - /// ARM-specific 128-bit wide vector of sixteen packed `u8`. + /// Arm-specific 128-bit wide vector of sixteen packed `u8`. pub struct uint8x16_t(16 x pub(crate) u8); - /// ARM-specific 128-bit wide vector of sixteen packed `p8`. + /// Arm-specific 128-bit wide vector of sixteen packed `p8`. pub struct poly8x16_t(16 x pub(crate) p8); - /// ARM-specific 128-bit wide vector of eight packed `i16`. + /// Arm-specific 128-bit wide vector of eight packed `i16`. pub struct int16x8_t(8 x pub(crate) i16); - /// ARM-specific 128-bit wide vector of eight packed `u16`. + /// Arm-specific 128-bit wide vector of eight packed `u16`. pub struct uint16x8_t(8 x pub(crate) u16); - // FIXME: ARM-specific 128-bit wide vector of eight packed `f16`. - // pub struct float16x8_t(f16, f16, f16, f16, f16, f16, f16); - /// ARM-specific 128-bit wide vector of eight packed `p16`. + // Arm-specific 128-bit wide vector of eight packed `f16`. + pub struct float16x8_t(8 x pub(crate) f16); + /// Arm-specific 128-bit wide vector of eight packed `p16`. pub struct poly16x8_t(8 x pub(crate) p16); - /// ARM-specific 128-bit wide vector of four packed `i32`. + /// Arm-specific 128-bit wide vector of four packed `i32`. pub struct int32x4_t(4 x pub(crate) i32); - /// ARM-specific 128-bit wide vector of four packed `u32`. + /// Arm-specific 128-bit wide vector of four packed `u32`. pub struct uint32x4_t(4 x pub(crate) u32); - /// ARM-specific 128-bit wide vector of four packed `f32`. + /// Arm-specific 128-bit wide vector of four packed `f32`. pub struct float32x4_t(4 x pub(crate) f32); - /// ARM-specific 128-bit wide vector of two packed `i64`. + /// Arm-specific 128-bit wide vector of two packed `i64`. pub struct int64x2_t(2 x pub(crate) i64); - /// ARM-specific 128-bit wide vector of two packed `u64`. + /// Arm-specific 128-bit wide vector of two packed `u64`. pub struct uint64x2_t(2 x pub(crate) u64); - /// ARM-specific 128-bit wide vector of two packed `p64`. + /// Arm-specific 128-bit wide vector of two packed `p64`. pub struct poly64x2_t(2 x pub(crate) p64); } -/// ARM-specific type containing two `int8x8_t` vectors. +/// Arm-specific type containing two `int8x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -119,7 +119,7 @@ types! { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct int8x8x2_t(pub int8x8_t, pub int8x8_t); -/// ARM-specific type containing three `int8x8_t` vectors. +/// Arm-specific type containing three `int8x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -131,7 +131,7 @@ pub struct int8x8x2_t(pub int8x8_t, pub int8x8_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct int8x8x3_t(pub int8x8_t, pub int8x8_t, pub int8x8_t); -/// ARM-specific type containing four `int8x8_t` vectors. +/// Arm-specific type containing four `int8x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -144,7 +144,7 @@ pub struct int8x8x3_t(pub int8x8_t, pub int8x8_t, pub int8x8_t); )] pub struct int8x8x4_t(pub int8x8_t, pub int8x8_t, pub int8x8_t, pub int8x8_t); -/// ARM-specific type containing two `int8x16_t` vectors. +/// Arm-specific type containing two `int8x16_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -156,7 +156,7 @@ pub struct int8x8x4_t(pub int8x8_t, pub int8x8_t, pub int8x8_t, pub int8x8_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct int8x16x2_t(pub int8x16_t, pub int8x16_t); -/// ARM-specific type containing three `int8x16_t` vectors. +/// Arm-specific type containing three `int8x16_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -168,7 +168,7 @@ pub struct int8x16x2_t(pub int8x16_t, pub int8x16_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct int8x16x3_t(pub int8x16_t, pub int8x16_t, pub int8x16_t); -/// ARM-specific type containing four `int8x16_t` vectors. +/// Arm-specific type containing four `int8x16_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -181,7 +181,7 @@ pub struct int8x16x3_t(pub int8x16_t, pub int8x16_t, pub int8x16_t); )] pub struct int8x16x4_t(pub int8x16_t, pub int8x16_t, pub int8x16_t, pub int8x16_t); -/// ARM-specific type containing two `uint8x8_t` vectors. +/// Arm-specific type containing two `uint8x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -193,7 +193,7 @@ pub struct int8x16x4_t(pub int8x16_t, pub int8x16_t, pub int8x16_t, pub int8x16_ unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct uint8x8x2_t(pub uint8x8_t, pub uint8x8_t); -/// ARM-specific type containing three `uint8x8_t` vectors. +/// Arm-specific type containing three `uint8x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -205,7 +205,7 @@ pub struct uint8x8x2_t(pub uint8x8_t, pub uint8x8_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct uint8x8x3_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t); -/// ARM-specific type containing four `uint8x8_t` vectors. +/// Arm-specific type containing four `uint8x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -218,7 +218,7 @@ pub struct uint8x8x3_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t); )] pub struct uint8x8x4_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t, pub uint8x8_t); -/// ARM-specific type containing two `uint8x16_t` vectors. +/// Arm-specific type containing two `uint8x16_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -230,7 +230,7 @@ pub struct uint8x8x4_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t, pub uint8x8_ unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct uint8x16x2_t(pub uint8x16_t, pub uint8x16_t); -/// ARM-specific type containing three `uint8x16_t` vectors. +/// Arm-specific type containing three `uint8x16_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -242,7 +242,7 @@ pub struct uint8x16x2_t(pub uint8x16_t, pub uint8x16_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct uint8x16x3_t(pub uint8x16_t, pub uint8x16_t, pub uint8x16_t); -/// ARM-specific type containing four `uint8x16_t` vectors. +/// Arm-specific type containing four `uint8x16_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -260,7 +260,7 @@ pub struct uint8x16x4_t( pub uint8x16_t, ); -/// ARM-specific type containing two `poly8x8_t` vectors. +/// Arm-specific type containing two `poly8x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -272,7 +272,7 @@ pub struct uint8x16x4_t( unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct poly8x8x2_t(pub poly8x8_t, pub poly8x8_t); -/// ARM-specific type containing three `poly8x8_t` vectors. +/// Arm-specific type containing three `poly8x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -284,7 +284,7 @@ pub struct poly8x8x2_t(pub poly8x8_t, pub poly8x8_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct poly8x8x3_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t); -/// ARM-specific type containing four `poly8x8_t` vectors. +/// Arm-specific type containing four `poly8x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -297,7 +297,7 @@ pub struct poly8x8x3_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t); )] pub struct poly8x8x4_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t, pub poly8x8_t); -/// ARM-specific type containing two `poly8x16_t` vectors. +/// Arm-specific type containing two `poly8x16_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -309,7 +309,7 @@ pub struct poly8x8x4_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t, pub poly8x8_ unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct poly8x16x2_t(pub poly8x16_t, pub poly8x16_t); -/// ARM-specific type containing three `poly8x16_t` vectors. +/// Arm-specific type containing three `poly8x16_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -321,7 +321,7 @@ pub struct poly8x16x2_t(pub poly8x16_t, pub poly8x16_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct poly8x16x3_t(pub poly8x16_t, pub poly8x16_t, pub poly8x16_t); -/// ARM-specific type containing four `poly8x16_t` vectors. +/// Arm-specific type containing four `poly8x16_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -339,7 +339,7 @@ pub struct poly8x16x4_t( pub poly8x16_t, ); -/// ARM-specific type containing two `int16x4_t` vectors. +/// Arm-specific type containing two `int16x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -351,7 +351,7 @@ pub struct poly8x16x4_t( unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct int16x4x2_t(pub int16x4_t, pub int16x4_t); -/// ARM-specific type containing three `int16x4_t` vectors. +/// Arm-specific type containing three `int16x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -363,7 +363,7 @@ pub struct int16x4x2_t(pub int16x4_t, pub int16x4_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct int16x4x3_t(pub int16x4_t, pub int16x4_t, pub int16x4_t); -/// ARM-specific type containing four `int16x4_t` vectors. +/// Arm-specific type containing four `int16x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -376,7 +376,7 @@ pub struct int16x4x3_t(pub int16x4_t, pub int16x4_t, pub int16x4_t); )] pub struct int16x4x4_t(pub int16x4_t, pub int16x4_t, pub int16x4_t, pub int16x4_t); -/// ARM-specific type containing two `int16x8_t` vectors. +/// Arm-specific type containing two `int16x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -388,7 +388,7 @@ pub struct int16x4x4_t(pub int16x4_t, pub int16x4_t, pub int16x4_t, pub int16x4_ unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct int16x8x2_t(pub int16x8_t, pub int16x8_t); -/// ARM-specific type containing three `int16x8_t` vectors. +/// Arm-specific type containing three `int16x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -400,7 +400,7 @@ pub struct int16x8x2_t(pub int16x8_t, pub int16x8_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct int16x8x3_t(pub int16x8_t, pub int16x8_t, pub int16x8_t); -/// ARM-specific type containing four `int16x8_t` vectors. +/// Arm-specific type containing four `int16x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -413,7 +413,7 @@ pub struct int16x8x3_t(pub int16x8_t, pub int16x8_t, pub int16x8_t); )] pub struct int16x8x4_t(pub int16x8_t, pub int16x8_t, pub int16x8_t, pub int16x8_t); -/// ARM-specific type containing two `uint16x4_t` vectors. +/// Arm-specific type containing two `uint16x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -425,7 +425,7 @@ pub struct int16x8x4_t(pub int16x8_t, pub int16x8_t, pub int16x8_t, pub int16x8_ unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct uint16x4x2_t(pub uint16x4_t, pub uint16x4_t); -/// ARM-specific type containing three `uint16x4_t` vectors. +/// Arm-specific type containing three `uint16x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -437,7 +437,7 @@ pub struct uint16x4x2_t(pub uint16x4_t, pub uint16x4_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct uint16x4x3_t(pub uint16x4_t, pub uint16x4_t, pub uint16x4_t); -/// ARM-specific type containing four `uint16x4_t` vectors. +/// Arm-specific type containing four `uint16x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -455,7 +455,7 @@ pub struct uint16x4x4_t( pub uint16x4_t, ); -/// ARM-specific type containing two `uint16x8_t` vectors. +/// Arm-specific type containing two `uint16x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -467,7 +467,7 @@ pub struct uint16x4x4_t( unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct uint16x8x2_t(pub uint16x8_t, pub uint16x8_t); -/// ARM-specific type containing three `uint16x8_t` vectors. +/// Arm-specific type containing three `uint16x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -479,7 +479,7 @@ pub struct uint16x8x2_t(pub uint16x8_t, pub uint16x8_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct uint16x8x3_t(pub uint16x8_t, pub uint16x8_t, pub uint16x8_t); -/// ARM-specific type containing four `uint16x8_t` vectors. +/// Arm-specific type containing four `uint16x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -497,7 +497,7 @@ pub struct uint16x8x4_t( pub uint16x8_t, ); -/// ARM-specific type containing two `poly16x4_t` vectors. +/// Arm-specific type containing two `poly16x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -509,7 +509,7 @@ pub struct uint16x8x4_t( unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct poly16x4x2_t(pub poly16x4_t, pub poly16x4_t); -/// ARM-specific type containing three `poly16x4_t` vectors. +/// Arm-specific type containing three `poly16x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -521,7 +521,7 @@ pub struct poly16x4x2_t(pub poly16x4_t, pub poly16x4_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct poly16x4x3_t(pub poly16x4_t, pub poly16x4_t, pub poly16x4_t); -/// ARM-specific type containing four `poly16x4_t` vectors. +/// Arm-specific type containing four `poly16x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -539,7 +539,7 @@ pub struct poly16x4x4_t( pub poly16x4_t, ); -/// ARM-specific type containing two `poly16x8_t` vectors. +/// Arm-specific type containing two `poly16x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -551,7 +551,7 @@ pub struct poly16x4x4_t( unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct poly16x8x2_t(pub poly16x8_t, pub poly16x8_t); -/// ARM-specific type containing three `poly16x8_t` vectors. +/// Arm-specific type containing three `poly16x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -563,7 +563,7 @@ pub struct poly16x8x2_t(pub poly16x8_t, pub poly16x8_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct poly16x8x3_t(pub poly16x8_t, pub poly16x8_t, pub poly16x8_t); -/// ARM-specific type containing four `poly16x8_t` vectors. +/// Arm-specific type containing four `poly16x8_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -581,7 +581,7 @@ pub struct poly16x8x4_t( pub poly16x8_t, ); -/// ARM-specific type containing two `int32x2_t` vectors. +/// Arm-specific type containing two `int32x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -593,7 +593,7 @@ pub struct poly16x8x4_t( unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct int32x2x2_t(pub int32x2_t, pub int32x2_t); -/// ARM-specific type containing three `int32x2_t` vectors. +/// Arm-specific type containing three `int32x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -605,7 +605,7 @@ pub struct int32x2x2_t(pub int32x2_t, pub int32x2_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct int32x2x3_t(pub int32x2_t, pub int32x2_t, pub int32x2_t); -/// ARM-specific type containing four `int32x2_t` vectors. +/// Arm-specific type containing four `int32x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -618,7 +618,7 @@ pub struct int32x2x3_t(pub int32x2_t, pub int32x2_t, pub int32x2_t); )] pub struct int32x2x4_t(pub int32x2_t, pub int32x2_t, pub int32x2_t, pub int32x2_t); -/// ARM-specific type containing two `int32x4_t` vectors. +/// Arm-specific type containing two `int32x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -630,7 +630,7 @@ pub struct int32x2x4_t(pub int32x2_t, pub int32x2_t, pub int32x2_t, pub int32x2_ unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct int32x4x2_t(pub int32x4_t, pub int32x4_t); -/// ARM-specific type containing three `int32x4_t` vectors. +/// Arm-specific type containing three `int32x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -642,7 +642,7 @@ pub struct int32x4x2_t(pub int32x4_t, pub int32x4_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct int32x4x3_t(pub int32x4_t, pub int32x4_t, pub int32x4_t); -/// ARM-specific type containing four `int32x4_t` vectors. +/// Arm-specific type containing four `int32x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -655,7 +655,7 @@ pub struct int32x4x3_t(pub int32x4_t, pub int32x4_t, pub int32x4_t); )] pub struct int32x4x4_t(pub int32x4_t, pub int32x4_t, pub int32x4_t, pub int32x4_t); -/// ARM-specific type containing two `uint32x2_t` vectors. +/// Arm-specific type containing two `uint32x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -667,7 +667,7 @@ pub struct int32x4x4_t(pub int32x4_t, pub int32x4_t, pub int32x4_t, pub int32x4_ unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct uint32x2x2_t(pub uint32x2_t, pub uint32x2_t); -/// ARM-specific type containing three `uint32x2_t` vectors. +/// Arm-specific type containing three `uint32x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -679,7 +679,7 @@ pub struct uint32x2x2_t(pub uint32x2_t, pub uint32x2_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct uint32x2x3_t(pub uint32x2_t, pub uint32x2_t, pub uint32x2_t); -/// ARM-specific type containing four `uint32x2_t` vectors. +/// Arm-specific type containing four `uint32x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -697,7 +697,7 @@ pub struct uint32x2x4_t( pub uint32x2_t, ); -/// ARM-specific type containing two `uint32x4_t` vectors. +/// Arm-specific type containing two `uint32x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -709,7 +709,7 @@ pub struct uint32x2x4_t( unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct uint32x4x2_t(pub uint32x4_t, pub uint32x4_t); -/// ARM-specific type containing three `uint32x4_t` vectors. +/// Arm-specific type containing three `uint32x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -721,7 +721,7 @@ pub struct uint32x4x2_t(pub uint32x4_t, pub uint32x4_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct uint32x4x3_t(pub uint32x4_t, pub uint32x4_t, pub uint32x4_t); -/// ARM-specific type containing four `uint32x4_t` vectors. +/// Arm-specific type containing four `uint32x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -739,7 +739,53 @@ pub struct uint32x4x4_t( pub uint32x4_t, ); -/// ARM-specific type containing two `float32x2_t` vectors. +/// Arm-specific type containing two `float16x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub struct float16x4x2_t(pub float16x4_t, pub float16x4_t); + +/// Arm-specific type containing three `float16x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub struct float16x4x3_t(pub float16x4_t, pub float16x4_t, pub float16x4_t); + +/// Arm-specific type containing four `float16x4_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub struct float16x4x4_t( + pub float16x4_t, + pub float16x4_t, + pub float16x4_t, + pub float16x4_t, +); + +/// Arm-specific type containing two `float16x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub struct float16x8x2_t(pub float16x8_t, pub float16x8_t); + +/// Arm-specific type containing three `float16x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] + +pub struct float16x8x3_t(pub float16x8_t, pub float16x8_t, pub float16x8_t); +/// Arm-specific type containing four `float16x8_t` vectors. +#[repr(C)] +#[derive(Copy, Clone, Debug)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub struct float16x8x4_t( + pub float16x8_t, + pub float16x8_t, + pub float16x8_t, + pub float16x8_t, +); + +/// Arm-specific type containing two `float32x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -751,7 +797,7 @@ pub struct uint32x4x4_t( unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct float32x2x2_t(pub float32x2_t, pub float32x2_t); -/// ARM-specific type containing three `float32x2_t` vectors. +/// Arm-specific type containing three `float32x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -763,7 +809,7 @@ pub struct float32x2x2_t(pub float32x2_t, pub float32x2_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct float32x2x3_t(pub float32x2_t, pub float32x2_t, pub float32x2_t); -/// ARM-specific type containing four `float32x2_t` vectors. +/// Arm-specific type containing four `float32x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -781,7 +827,7 @@ pub struct float32x2x4_t( pub float32x2_t, ); -/// ARM-specific type containing two `float32x4_t` vectors. +/// Arm-specific type containing two `float32x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -793,7 +839,7 @@ pub struct float32x2x4_t( unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct float32x4x2_t(pub float32x4_t, pub float32x4_t); -/// ARM-specific type containing three `float32x4_t` vectors. +/// Arm-specific type containing three `float32x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -805,7 +851,7 @@ pub struct float32x4x2_t(pub float32x4_t, pub float32x4_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct float32x4x3_t(pub float32x4_t, pub float32x4_t, pub float32x4_t); -/// ARM-specific type containing four `float32x4_t` vectors. +/// Arm-specific type containing four `float32x4_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -823,7 +869,7 @@ pub struct float32x4x4_t( pub float32x4_t, ); -/// ARM-specific type containing two `int64x1_t` vectors. +/// Arm-specific type containing two `int64x1_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -835,7 +881,7 @@ pub struct float32x4x4_t( unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct int64x1x2_t(pub int64x1_t, pub int64x1_t); -/// ARM-specific type containing three `int64x1_t` vectors. +/// Arm-specific type containing three `int64x1_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -847,7 +893,7 @@ pub struct int64x1x2_t(pub int64x1_t, pub int64x1_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct int64x1x3_t(pub int64x1_t, pub int64x1_t, pub int64x1_t); -/// ARM-specific type containing four `int64x1_t` vectors. +/// Arm-specific type containing four `int64x1_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -860,7 +906,7 @@ pub struct int64x1x3_t(pub int64x1_t, pub int64x1_t, pub int64x1_t); )] pub struct int64x1x4_t(pub int64x1_t, pub int64x1_t, pub int64x1_t, pub int64x1_t); -/// ARM-specific type containing two `int64x2_t` vectors. +/// Arm-specific type containing two `int64x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -872,7 +918,7 @@ pub struct int64x1x4_t(pub int64x1_t, pub int64x1_t, pub int64x1_t, pub int64x1_ unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct int64x2x2_t(pub int64x2_t, pub int64x2_t); -/// ARM-specific type containing three `int64x2_t` vectors. +/// Arm-specific type containing three `int64x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -884,7 +930,7 @@ pub struct int64x2x2_t(pub int64x2_t, pub int64x2_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct int64x2x3_t(pub int64x2_t, pub int64x2_t, pub int64x2_t); -/// ARM-specific type containing four `int64x2_t` vectors. +/// Arm-specific type containing four `int64x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -897,7 +943,7 @@ pub struct int64x2x3_t(pub int64x2_t, pub int64x2_t, pub int64x2_t); )] pub struct int64x2x4_t(pub int64x2_t, pub int64x2_t, pub int64x2_t, pub int64x2_t); -/// ARM-specific type containing two `uint64x1_t` vectors. +/// Arm-specific type containing two `uint64x1_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -909,7 +955,7 @@ pub struct int64x2x4_t(pub int64x2_t, pub int64x2_t, pub int64x2_t, pub int64x2_ unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct uint64x1x2_t(pub uint64x1_t, pub uint64x1_t); -/// ARM-specific type containing three `uint64x1_t` vectors. +/// Arm-specific type containing three `uint64x1_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -921,7 +967,7 @@ pub struct uint64x1x2_t(pub uint64x1_t, pub uint64x1_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct uint64x1x3_t(pub uint64x1_t, pub uint64x1_t, pub uint64x1_t); -/// ARM-specific type containing four `uint64x1_t` vectors. +/// Arm-specific type containing four `uint64x1_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -939,7 +985,7 @@ pub struct uint64x1x4_t( pub uint64x1_t, ); -/// ARM-specific type containing two `uint64x2_t` vectors. +/// Arm-specific type containing two `uint64x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -951,7 +997,7 @@ pub struct uint64x1x4_t( unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct uint64x2x2_t(pub uint64x2_t, pub uint64x2_t); -/// ARM-specific type containing three `uint64x2_t` vectors. +/// Arm-specific type containing three `uint64x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -963,7 +1009,7 @@ pub struct uint64x2x2_t(pub uint64x2_t, pub uint64x2_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct uint64x2x3_t(pub uint64x2_t, pub uint64x2_t, pub uint64x2_t); -/// ARM-specific type containing four `uint64x2_t` vectors. +/// Arm-specific type containing four `uint64x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -981,7 +1027,7 @@ pub struct uint64x2x4_t( pub uint64x2_t, ); -/// ARM-specific type containing two `poly64x1_t` vectors. +/// Arm-specific type containing two `poly64x1_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -993,7 +1039,7 @@ pub struct uint64x2x4_t( unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct poly64x1x2_t(pub poly64x1_t, pub poly64x1_t); -/// ARM-specific type containing three `poly64x1_t` vectors. +/// Arm-specific type containing three `poly64x1_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -1005,7 +1051,7 @@ pub struct poly64x1x2_t(pub poly64x1_t, pub poly64x1_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct poly64x1x3_t(pub poly64x1_t, pub poly64x1_t, pub poly64x1_t); -/// ARM-specific type containing four `poly64x1_t` vectors. +/// Arm-specific type containing four `poly64x1_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -1023,7 +1069,7 @@ pub struct poly64x1x4_t( pub poly64x1_t, ); -/// ARM-specific type containing two `poly64x2_t` vectors. +/// Arm-specific type containing two `poly64x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -1035,7 +1081,7 @@ pub struct poly64x1x4_t( unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct poly64x2x2_t(pub poly64x2_t, pub poly64x2_t); -/// ARM-specific type containing three `poly64x2_t` vectors. +/// Arm-specific type containing three `poly64x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -1047,7 +1093,7 @@ pub struct poly64x2x2_t(pub poly64x2_t, pub poly64x2_t); unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub struct poly64x2x3_t(pub poly64x2_t, pub poly64x2_t, pub poly64x2_t); -/// ARM-specific type containing four `poly64x2_t` vectors. +/// Arm-specific type containing four `poly64x2_t` vectors. #[repr(C)] #[derive(Copy, Clone, Debug)] #[cfg_attr( @@ -4737,6 +4783,24 @@ pub unsafe fn vbsl_u64(a: uint64x1_t, b: uint64x1_t, c: uint64x1_t) -> uint64x1_ simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c)) } +/// Bitwise Select. +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vbsl_f16(a: uint16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + let not = int16x4_t::splat(-1); + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) +} + /// Bitwise Select. #[inline] #[target_feature(enable = "neon")] @@ -5050,6 +5114,24 @@ pub unsafe fn vbslq_p16(a: uint16x8_t, b: poly16x8_t, c: poly16x8_t) -> poly16x8 )) } +/// Bitwise Select. +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(bsl) +)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +pub unsafe fn vbslq_f16(a: uint16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + let not = int16x8_t::splat(-1); + transmute(simd_or( + simd_and(a, transmute(b)), + simd_and(simd_xor(a, transmute(not)), transmute(c)), + )) +} + /// Bitwise Select. (128-bit) #[inline] #[target_feature(enable = "neon")] diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs index a97d45c3bd..29e4b3e7a2 100644 --- a/crates/core_arch/src/simd.rs +++ b/crates/core_arch/src/simd.rs @@ -199,6 +199,8 @@ simd_ty!( simd_ty!(i32x4[i32;4]: x0, x1, x2, x3); simd_ty!(i64x2[i64;2]: x0, x1); +simd_ty!(f16x4[f16;4]: x0, x1, x2, x3); + simd_ty!( f16x8[f16;8]: x0, diff --git a/crates/intrinsic-test/missing_aarch64.txt b/crates/intrinsic-test/missing_aarch64.txt index 4b3fa09a67..ff13fcf4fc 100644 --- a/crates/intrinsic-test/missing_aarch64.txt +++ b/crates/intrinsic-test/missing_aarch64.txt @@ -23,4 +23,10 @@ vrnd64zq_f64 vrnd32x_f64 vrnd32z_f64 vrnd64x_f64 -vrnd64z_f64 \ No newline at end of file +vrnd64z_f64 + +# Broken in Clang +vcvth_s16_f16 +# FIXME: Broken output due to missing f16 printing support in Rust, see git blame for this line +vmulh_lane_f16 +vmulh_laneq_f16 diff --git a/crates/intrinsic-test/missing_arm.txt b/crates/intrinsic-test/missing_arm.txt index 5fc97acb8a..04c09a27d9 100644 --- a/crates/intrinsic-test/missing_arm.txt +++ b/crates/intrinsic-test/missing_arm.txt @@ -108,6 +108,74 @@ vsri_n_p64 vsriq_n_p64 vtst_p64 vtstq_p64 +vaddh_f16 +vsubh_f16 +vabsh_f16 +vdivh_f16 +vmulh_f16 +vfmsh_f16 +vfmah_f16 +vminnmh_f16 +vmaxnmh_f16 +vrndh_f16 +vrndnh_f16 +vrndih_f16 +vrndah_f16 +vrndph_f16 +vrndmh_f16 +vrndxh_f16 +vsqrth_f16 +vnegh_f16 +vcvth_f16_s32 +vcvth_s32_f16 +vcvth_n_f16_s32 +vcvth_n_s32_f16 +vcvth_f16_u32 +vcvth_u32_f16 +vcvth_n_f16_u32 +vcvth_n_u32_f16 +vcvtah_s32_f16 +vcvtah_u32_f16 +vcvtmh_s32_f16 +vcvtmh_u32_f16 +vcvtpq_s16_f16 +vcvtpq_u16_f16 +vcvtp_s16_f16 +vcvtp_u16_f16 +vcvtph_s32_f16 +vcvtph_u32_f16 +vcvtnh_u32_f16 +vcvtnh_s32_f16 +vfmlsl_low_f16 +vfmlslq_low_f16 +vfmlsl_high_f16 +vfmlslq_high_f16 +vfmlsl_lane_high_f16 +vfmlsl_laneq_high_f16 +vfmlslq_lane_high_f16 +vfmlslq_laneq_high_f16 +vfmlsl_lane_low_f16 +vfmlsl_laneq_low_f16 +vfmlslq_lane_low_f16 +vfmlslq_laneq_low_f16 +vfmlal_low_f16 +vfmlalq_low_f16 +vfmlal_high_f16 +vfmlalq_high_f16 +vfmlal_lane_low_f16 +vfmlal_laneq_low_f16 +vfmlalq_lane_low_f16 +vfmlalq_laneq_low_f16 +vfmlal_lane_high_f16 +vfmlal_laneq_high_f16 +vfmlalq_lane_high_f16 +vfmlalq_laneq_high_f16 +vreinterpret_f16_p64 +vreinterpretq_f16_p64 +vreinterpret_p64_f16 +vreinterpretq_p64_f16 +vreinterpret_p128_f16 +vreinterpretq_p128_f16 # Present in Clang header but triggers an ICE due to lack of backend support. vcmla_f32 @@ -134,6 +202,31 @@ vcmlaq_rot270_laneq_f32 vcmlaq_rot90_f32 vcmlaq_rot90_lane_f32 vcmlaq_rot90_laneq_f32 +vcmla_f16 +vcmlaq_f16 +vcmla_laneq_f16 +vcmla_lane_f16 +vcmla_laneq_f16 +vcmlaq_lane_f16 +vcmlaq_laneq_f16 +vcmla_rot90_f16 +vcmlaq_rot90_f16 +vcmla_rot180_f16 +vcmlaq_rot180_f16 +vcmla_rot270_f16 +vcmlaq_rot270_f16 +vcmla_rot90_lane_f16 +vcmla_rot90_laneq_f16 +vcmlaq_rot90_lane_f16 +vcmlaq_rot90_laneq_f16 +vcmla_rot180_lane_f16 +vcmla_rot180_laneq_f16 +vcmlaq_rot180_lane_f16 +vcmlaq_rot180_laneq_f16 +vcmla_rot270_lane_f16 +vcmla_rot270_laneq_f16 +vcmlaq_rot270_lane_f16 +vcmlaq_rot270_laneq_f16 # Implemented in stdarch for A64 only, Clang support both A32/A64 vadd_s64 @@ -182,4 +275,46 @@ vrndpq_f32 vrndq_f32 vrndq_f32 vrndx_f32 -vrndxq_f32 \ No newline at end of file +vrndxq_f32 +vrnda_f16 +vrnda_f16 +vrndaq_f16 +vrndaq_f16 +vrnd_f16 +vrnd_f16 +vrndi_f16 +vrndi_f16 +vrndiq_f16 +vrndiq_f16 +vrndm_f16 +vrndm_f16 +vrndmq_f16 +vrndmq_f16 +vrndns_f16 +vrndp_f16 +vrndpq_f16 +vrndq_f16 +vrndx_f16 +vrndxq_f16 +vpmin_f16 +vpmax_f16 +vcaddq_rot270_f16 +vcaddq_rot90_f16 +vcadd_rot270_f16 +vcadd_rot90_f16 +vcvtm_s16_f16 +vcvtmq_s16_f16 +vcvtm_u16_f16 +vcvtmq_u16_f16 +vcvtaq_s16_f16 +vcvtaq_u16_f16 +vcvtnq_s16_f16 +vcvtnq_u16_f16 +vcvtn_s16_f16 +vcvtn_u16_f16 +vcvtaq_s16_f16 +vcvtaq_u16_f16 +vcvta_s16_f16 +vcvta_u16_f16 +vceqz_f16 +vceqzq_f16 diff --git a/crates/intrinsic-test/src/intrinsic.rs b/crates/intrinsic-test/src/intrinsic.rs index b5c1071777..b96edf1852 100644 --- a/crates/intrinsic-test/src/intrinsic.rs +++ b/crates/intrinsic-test/src/intrinsic.rs @@ -62,6 +62,7 @@ impl Intrinsic { format!( "{promote}cast<{cast}>(__return_value)", cast = match self.results.kind() { + TypeKind::Float if self.results.inner_size() == 16 => "float16_t".to_string(), TypeKind::Float if self.results.inner_size() == 32 => "float".to_string(), TypeKind::Float if self.results.inner_size() == 64 => "double".to_string(), TypeKind::Int => format!("int{}_t", self.results.inner_size()), diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs index d9577db3c6..a64e264d8b 100644 --- a/crates/intrinsic-test/src/main.rs +++ b/crates/intrinsic-test/src/main.rs @@ -114,6 +114,15 @@ std::ostream& operator<<(std::ostream& os, poly128_t value) {{ }} #endif +std::ostream& operator<<(std::ostream& os, float16_t value) {{ + uint16_t temp = 0; + memcpy(&temp, &value, sizeof(float16_t)); + std::stringstream ss; + ss << "0x" << std::setfill('0') << std::setw(4) << std::hex << temp; + os << ss.str(); + return os; +}} + {arglists} int main(int argc, char **argv) {{ @@ -185,6 +194,7 @@ fn generate_rust_program(notices: &str, intrinsic: &Intrinsic, target: &str) -> format!( r#"{notices}#![feature(simd_ffi)] #![feature(link_llvm_intrinsics)] +#![feature(f16)] #![cfg_attr(target_arch = "arm", feature(stdarch_arm_neon_intrinsics))] #![cfg_attr(target_arch = "arm", feature(stdarch_aarch32_crc32))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))] @@ -193,6 +203,7 @@ fn generate_rust_program(notices: &str, intrinsic: &Intrinsic, target: &str) -> #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sha3))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))] +#![feature(stdarch_neon_f16)] #![allow(non_upper_case_globals)] use core_arch::arch::{target_arch}::*; @@ -227,9 +238,9 @@ fn compile_c( ) -> bool { let flags = std::env::var("CPPFLAGS").unwrap_or("".into()); let arch_flags = if target.contains("v7") { - "-march=armv8.6-a+crypto+crc+dotprod" + "-march=armv8.6-a+crypto+crc+dotprod+fp16" } else { - "-march=armv8.6-a+crypto+sha3+crc+dotprod" + "-march=armv8.6-a+crypto+sha3+crc+dotprod+fp16" }; let intrinsic_name = &intrinsic.name; @@ -324,7 +335,12 @@ fn build_c( let c_filename = format!(r#"c_programs/{}.cpp"#, i.name); let mut file = File::create(&c_filename).unwrap(); - let c_code = generate_c_program(notices, &["arm_neon.h", "arm_acle.h"], i, target); + let c_code = generate_c_program( + notices, + &["arm_neon.h", "arm_acle.h", "arm_fp16.h"], + i, + target, + ); file.write_all(c_code.into_bytes().as_slice()).unwrap(); match compiler { None => true, @@ -512,13 +528,7 @@ fn main() { // Not sure how we would compare intrinsic that returns void. .filter(|i| i.results.kind() != TypeKind::Void) .filter(|i| i.results.kind() != TypeKind::BFloat) - .filter(|i| !(i.results.kind() == TypeKind::Float && i.results.inner_size() == 16)) .filter(|i| !i.arguments.iter().any(|a| a.ty.kind() == TypeKind::BFloat)) - .filter(|i| { - !i.arguments - .iter() - .any(|a| a.ty.kind() == TypeKind::Float && a.ty.inner_size() == 16) - }) // Skip pointers for now, we would probably need to look at the return // type to work out how many elements we need to point to. .filter(|i| !i.arguments.iter().any(|a| a.is_ptr())) diff --git a/crates/intrinsic-test/src/types.rs b/crates/intrinsic-test/src/types.rs index 1eb44896f7..d5bf7c8c64 100644 --- a/crates/intrinsic-test/src/types.rs +++ b/crates/intrinsic-test/src/types.rs @@ -348,14 +348,16 @@ impl IntrinsicType { } IntrinsicType::Type { kind: TypeKind::Float, - bit_len: Some(bit_len @ (32 | 64)), + bit_len: Some(bit_len @ (16 | 32 | 64)), simd_len, vec_len, .. } => { let (prefix, cast_prefix, cast_suffix, suffix) = match (language, bit_len) { + (&Language::Rust, 16) => ("[", "f16::from_bits(", ")", "]"), (&Language::Rust, 32) => ("[", "f32::from_bits(", ")", "]"), (&Language::Rust, 64) => ("[", "f64::from_bits(", ")", "]"), + (&Language::C, 16) => ("{", "cast(", ")", "}"), (&Language::C, 32) => ("{", "cast(", ")", "}"), (&Language::C, 64) => ("{", "cast(", ")", "}"), _ => unreachable!(), diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index 3ce3e4fcb4..f3924b0f94 100644 --- a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -5,7 +5,7 @@ arch_cfgs: # Generate big endian shuffles auto_big_endian: true -# Repeatedly used anchors +# Repeatedly used anchors # #[stable(feature = "neon_intrinsics", since = "1.59.0")] neon-stable: &neon-stable FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] @@ -60,6 +60,17 @@ neon-aes: &neon-aes neon-i8mm: &neon-i8mm FnCall: [target_feature, ['enable = "neon,i8mm"']] +# #[target_feature(enable = "neon,fp16")] +neon-fp16: &neon-fp16 + FnCall: [target_feature, ['enable = "neon,fp16"']] + +# #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +enable-fhm: &enable-fhm + FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']]}, { FnCall: [target_feature, ['enable = "fhm"']] }]] + +enable-fcma: &enable-fcma + FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']]}, { FnCall: [target_feature, ['enable = "fcma"']] }]] + #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))] neon-unstable-i8mm: &neon-unstable-i8mm FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']] }, { FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']] } ]] @@ -71,6 +82,10 @@ neon-unstable-fcma: &neon-unstable-fcma aarch64-crc-stable: &aarch64-crc-stable FnCall: [stable, ['feature = "stdarch_aarch64_crc32"', 'since = "1.80.0"']] +# #[unstable(feature = "stdarch_neon_f16", issue = "136306")] +neon-unstable-f16: &neon-unstable-f16 + FnCall: [unstable, ['feature = "stdarch_neon_f16"', 'issue = "136306"']] + intrinsics: - name: "vaddd_{type}" doc: Add @@ -171,6 +186,27 @@ intrinsics: - FnCall: ["vdup_n_{type[1]}", [b]] - 0 + - name: "vabd{type[0]}" + doc: "Floating-point absolute difference" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fabd] + safety: + unsafe: [neon] + types: + - ['h_f16', 'f16'] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vabd_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - 0 + - name: "vabdl_high{neon_type[0].noq}" doc: Signed Absolute difference Long arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -354,6 +390,28 @@ intrinsics: - FnCall: ["vdup_n_{type[1]}", [b]] - '0' + + - name: "vceq{type[0]}" + doc: "Floating-point compare equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vceq_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - '0' + - name: "vceqd_{type[0]}" doc: "Compare bitwise equal" arguments: ["a: {type[0]}", "b: {type[0]}"] @@ -534,6 +592,28 @@ intrinsics: - FnCall: ["vdup_n_{type[1]}", [b]] - '0' + + - name: "vcgt{type[0]}" + doc: "Floating-point compare greater than" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - 'simd_extract!' + - - FnCall: + - "vcgt_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - '0' + - name: "vclt{neon_type[0].no}" doc: "Compare signed less than" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -600,6 +680,28 @@ intrinsics: - FnCall: ["vdup_n_{type[1]}", [b]] - '0' + + - name: "vcle{type[0]}" + doc: "Floating-point compare less than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vcle_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - '0' + - name: "vcge{neon_type[0].no}" doc: "Compare signed greater than or equal" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -721,6 +823,26 @@ intrinsics: - - FnCall: ["vdup_n_{type[1]}", [a]] - '0' + - name: "vclez{type[0]}" + doc: "Floating-point compare less than or equal to zero" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vclez_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - '0' + - name: "vcltz{neon_type[0].no}" doc: "Compare signed less than zero" arguments: ["a: {neon_type[0]}"] @@ -787,6 +909,26 @@ intrinsics: - - FnCall: ["vdup_n_{type[1]}", [a]] - '0' + - name: "vcltz{type[0]}" + doc: "Floating-point compare less than zero" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vcltz_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - '0' + - name: "vcltzd_s64" doc: "Compare less than zero" arguments: ["a: {type[0]}"] @@ -843,6 +985,27 @@ intrinsics: - link: "llvm.aarch64.neon.facgt.{type[3]}.{type[1]}" arch: aarch64,arm64ec + - name: "vcagt{type[0]}" + doc: "Floating-point absolute compare greater than" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16", i32] + compose: + - LLVMLink: + name: "vcagt{type[0]}" + return_type: "{type[3]}" + links: + - link: "llvm.aarch64.neon.facgt.{type[3]}.{type[1]}" + arch: aarch64,arm64ec + - '_vcagth_f16(a, b).as_unsigned() as u16' + - name: "vcage{neon_type[0].no}" doc: "Floating-point absolute compare greater than or equal" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -881,6 +1044,28 @@ intrinsics: - link: "llvm.aarch64.neon.facge.{type[3]}.{type[1]}" arch: aarch64,arm64ec + + - name: "vcage{type[0]}" + doc: "Floating-point absolute compare greater than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16", i32] + compose: + - LLVMLink: + name: "vcage{type[0]}" + return_type: "{type[3]}" + links: + - link: "llvm.aarch64.neon.facge.{type[3]}.{type[1]}" + arch: aarch64,arm64ec + - "_vcageh_f16(a, b).as_unsigned() as u16" + - name: "vcalt{neon_type[0].no}" doc: "Floating-point absolute compare less than" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -911,6 +1096,21 @@ intrinsics: compose: - FnCall: ["vcagt{type[0]}", [b, a]] + - name: "vcalt{type[0]}" + doc: "Floating-point absolute compare less than" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: ["vcagt{type[0]}", [b, a]] + - name: "vcale{neon_type[0].no}" doc: "Floating-point absolute compare less than or equal" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -941,6 +1141,21 @@ intrinsics: compose: - FnCall: ["vcage{type[0]}", [b, a]] + - name: "vcale{type[0]}" + doc: "Floating-point absolute compare less than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: ["vcage{type[0]}", [b, a]] + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" doc: "Fixed-point convert to floating-point" arguments: ["a: {neon_type[0]}"] @@ -1027,6 +1242,119 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N]] + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [i32, f16, 'h'] + - [i64, f16, 'h'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + arguments: + - "a: {type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vcvtfxs2fp.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a, N]] + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {type[0]}"] + return_type: "{type[4]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [f16, s16, 'h', i32, i16] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - "vcvt{type[2]}_n_{type[3]}_{type[0]}::(a) as i16" + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [u32, f16, 'h'] + - [u64, f16, 'h'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + arguments: + - "a: {type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vcvtfxu2fp.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a.as_signed(), N]] + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [i16, f16, 'h', 'i32', 'as i32'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - "vcvt{type[2]}_n_{type[1]}_{type[3]}::(a {type[4]}) as {type[1]}" + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [u16, f16, 'h', u32] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - "vcvt{type[2]}_n_{type[1]}_{type[3]}::(a as {type[3]}) as {type[1]}" + + - name: "vcvt{type[2]}" doc: "Fixed-point convert to floating-point" arguments: ["a: {type[0]}"] @@ -1053,6 +1381,7 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ["_vcvt{type[2]}", [a, N]] + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" doc: "Fixed-point convert to floating-point" arguments: ["a: {neon_type[0]}"] @@ -1135,13 +1464,84 @@ intrinsics: compose: - Identifier: ["a as {type[1]}", Symbol] - - name: "vcvt_f64_f32" - doc: "Floating-point convert to higher precision long" - arguments: ["a: {neon_type[0]}"] - return_type: "{neon_type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + + - name: "vcvt{type[2]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {type[3]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["s16", "f16", "h_f16_s16", i16] + - ["s32", "f16", "h_f16_s32", i32] + - ["s64", "f16", "h_f16_s64", i64] + compose: + - Identifier: ["a as {type[1]}", Symbol] + + - name: "vcvt{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to signed fixed-point" + arguments: ["a: {type[0]}"] + return_type: "{type[3]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "s16", "h", i16, 'a as i16'] + - ["f16", "s32", "h", i32, 'a as i32'] + - ["f16", "s64", "h", i64, 'a as i64'] + compose: + - Identifier: ["{type[4]}", Symbol] + + - name: "vcvt{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned fixed-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u16", "h", 'a as u16'] + - ["f16", "u32", "h", 'a as u32'] + - ["f16", "u64", "h", 'a as u64'] + compose: + - Identifier: ["{type[3]}", Symbol] + + + - name: "vcvt{type[2]}" + doc: "Unsigned fixed-point convert to floating-point" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["u16", "f16", "h_f16_u16"] + - ["u32", "f16", "h_f16_u32"] + - ["u64", "f16", "h_f16_u64"] + compose: + - Identifier: ["a as {type[1]}", Symbol] + + + - name: "vcvt_f64_f32" + doc: "Floating-point convert to higher precision long" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] safety: unsafe: [neon] types: @@ -1171,8 +1571,44 @@ intrinsics: - '[2, 3]' - FnCall: [simd_cast, [b]] + - name: "vcvt_high_f16_f32" + doc: "Floating-point convert to lower precision" + arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn2]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x8_t, float16x4_t, float32x4_t] + compose: + - FnCall: + - vcombine_f16 + - - a + - FnCall: [vcvt_f16_f32, [b]] + + - name: "vcvt_high_f32_f16" + doc: "Floating-point convert to higher precision" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl2]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float32x4_t, float16x8_t] + compose: + - FnCall: + - vcvt_f32_f16 + - - FnCall: [vget_high_f16, [a]] + + - name: "vcvt_f32_f64" - doc: "Floating-point convert to lower precision narrow" + doc: "Floating-point convert" arguments: ["a: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: @@ -1306,6 +1742,77 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ["_vcvt{type[2]}", [a, N]] + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - ["f16", "i32", 'h', '16'] + - ["f16", "i64", 'h', '16'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= {type[3]}']] + - LLVMLink: + name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + arguments: ["a: {type[0]}", "n: i32"] + links: + - link: "llvm.aarch64.neon.vcvtfp2fxs.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a, N]] + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [f16, u16, 'h', u32] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - "vcvt{type[2]}_n_{type[3]}_{type[0]}::(a) as {type[1]}" + + + - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - ["f16", "u32", 'h', '16'] + - ["f16", "u64", 'h', '16'] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= {type[3]}']] + - LLVMLink: + name: "vcvt{type[2]}_n_{type[1]}_{type[0]}" + arguments: ["a: {type[0]}", "n: i32"] + links: + - link: "llvm.aarch64.neon.vcvtfp2fxu.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a, N]] + - name: "vcvt{type[2]}" doc: "Floating-point convert to fixed-point, rounding toward zero" arguments: ["a: {neon_type[0]}"] @@ -1375,6 +1882,27 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtas.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec + + - name: "vcvta{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding to nearest with ties to away" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - LLVMLink: + name: "vcvta_{neon_type[1]}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtas.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - name: "vcvta{type[2]}" doc: "Floating-point convert to integer, rounding to nearest with ties to away" arguments: ["a: {type[0]}"] @@ -1394,18 +1922,21 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtas.{type[1]}.{type[0]}" arch: aarch64,arm64ec + - name: "vcvta{type[2]}" doc: "Floating-point convert to integer, rounding to nearest with ties to away" arguments: ["a: {type[0]}"] return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-fp16 + - *neon-unstable-f16 safety: unsafe: [neon] types: - - ["f32", "u32", 's_u32_f32'] - - ["f64", "u64", 'd_u64_f64'] + - ["f16", "u32", 'h_u32_f16'] + - ["f16", "u64", 'h_u64_f16'] + compose: - LLVMLink: name: "vcvta{type[2]}" @@ -1413,92 +1944,84 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtau.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtn{neon_type[1].no}_{neon_type[0]}" - doc: "Floating-point convert to signed integer, rounding to nearest with ties to even" - arguments: ["a: {neon_type[0]}"] - return_type: "{neon_type[1]}" + + - name: "vcvta{type[2]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]] + - *neon-fp16 + - *neon-unstable-f16 safety: unsafe: [neon] types: - - [float32x2_t, int32x2_t] - - [float32x4_t, int32x4_t] - - [float64x1_t, int64x1_t] - - [float64x2_t, int64x2_t] + - ["f16", "i32", 'h_s32_f16'] + - ["f16", "i64", 'h_s64_f16'] compose: - LLVMLink: - name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + name: "vcvta{type[2]}" + return_type: "{type[1]}" links: - - link: "llvm.aarch64.neon.fcvtns.{neon_type[1]}.{neon_type[0]}" + - link: "llvm.aarch64.neon.fcvtas.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtn{type[2]}" - doc: "Floating-point convert to signed integer, rounding to nearest with ties to even" + + - name: "vcvta{type[2]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to away" arguments: ["a: {type[0]}"] return_type: "{type[1]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]] + - *neon-fp16 + - *neon-unstable-f16 safety: unsafe: [neon] types: - - ["f32", "i32", 's_s32_f32'] - - ["f64", "i64", 'd_s64_f64'] + - ["f16", "i16", 'h_s16_f16', 's32'] compose: - - LLVMLink: - name: "vcvtn{type[2]}" - links: - - link: "llvm.aarch64.neon.fcvtns.{type[1]}.{type[0]}" - arch: aarch64,arm64ec + - 'vcvtah_{type[3]}_f16(a) as i16' - - name: "vcvtm{neon_type[1].no}_{neon_type[0]}" - doc: "Floating-point convert to signed integer, rounding toward minus infinity" - arguments: ["a: {neon_type[0]}"] - return_type: "{neon_type[1]}" + - name: "vcvta{type[2]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] + - *neon-fp16 + - *neon-unstable-f16 safety: unsafe: [neon] types: - - [float32x2_t, int32x2_t] - - [float32x4_t, int32x4_t] - - [float64x1_t, int64x1_t] - - [float64x2_t, int64x2_t] + - ["f16", "u16", 'h_u16_f16', 'u32'] compose: - - LLVMLink: - name: "vcvtm{neon_type[1].no}_{neon_type[0]}" - links: - - link: "llvm.aarch64.neon.fcvtms.{neon_type[1]}.{neon_type[0]}" - arch: aarch64,arm64ec + - 'vcvtah_{type[3]}_f16(a) as u16' - - name: "vcvtm{type[2]}" - doc: "Floating-point convert to signed integer, rounding toward minus infinity" + - name: "vcvta{type[2]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to away" arguments: ["a: {type[0]}"] return_type: "{type[1]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] safety: unsafe: [neon] types: - - ["f32", "i32", 's_s32_f32'] - - ["f64", "i64", 'd_s64_f64'] + - ["f32", "u32", 's_u32_f32'] + - ["f64", "u64", 'd_u64_f64'] compose: - LLVMLink: - name: "vcvtm{type[2]}" + name: "vcvta{type[2]}" links: - - link: "llvm.aarch64.neon.fcvtms.{type[1]}.{type[0]}" + - link: "llvm.aarch64.neon.fcvtau.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtp{neon_type[1].no}_{neon_type[0]}" - doc: "Floating-point convert to signed integer, rounding toward plus infinity" + - name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding to nearest with ties to even" arguments: ["a: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] safety: unsafe: [neon] @@ -1509,17 +2032,17 @@ intrinsics: - [float64x2_t, int64x2_t] compose: - LLVMLink: - name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + name: "vcvtn{neon_type[1].no}_{neon_type[0]}" links: - - link: "llvm.aarch64.neon.fcvtps.{neon_type[1]}.{neon_type[0]}" + - link: "llvm.aarch64.neon.fcvtns.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec - - name: "vcvtp{type[2]}" - doc: "Floating-point convert to signed integer, rounding toward plus infinity" + - name: "vcvtn{type[2]}" + doc: "Floating-point convert to signed integer, rounding to nearest with ties to even" arguments: ["a: {type[0]}"] return_type: "{type[1]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] safety: unsafe: [neon] @@ -1528,47 +2051,285 @@ intrinsics: - ["f64", "i64", 'd_s64_f64'] compose: - LLVMLink: - name: "vcvtp{type[2]}" + name: "vcvtn{type[2]}" links: - - link: "llvm.aarch64.neon.fcvtps.{type[1]}.{type[0]}" + - link: "llvm.aarch64.neon.fcvtns.{type[1]}.{type[0]}" arch: aarch64,arm64ec + - name: "vcvtn{neon_type[1].no}_{neon_type[0]}" - doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" + doc: "Floating-point convert to signed integer, rounding to nearest with ties to even" arguments: ["a: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] + - *neon-fp16 + - *neon-unstable-f16 safety: unsafe: [neon] types: - - [float32x2_t, uint32x2_t] - - [float32x4_t, uint32x4_t] - - [float64x1_t, uint64x1_t] - - [float64x2_t, uint64x2_t] + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] compose: - LLVMLink: name: "vcvtn{neon_type[1].no}_{neon_type[0]}" links: - - link: "llvm.aarch64.neon.fcvtnu.{neon_type[1]}.{neon_type[0]}" + - link: "llvm.aarch64.neon.fcvtns.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec - - name: "vcvtn{type[2]}" + + - name: "vcvtn{neon_type[1].no}_{neon_type[0]}" doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-fp16 + - *neon-unstable-f16 safety: unsafe: [neon] types: - - ["f32", "u32", 's_u32_f32'] - - ["f64", "u64", 'd_u64_f64'] + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] compose: - LLVMLink: - name: "vcvtn{type[2]}" + name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtnu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to even" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "i32", 'h'] + - ["f16", "i64", 'h'] + compose: + - LLVMLink: + name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtns.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding to nearest with ties to even" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "i16", 'h', 'i32'] + compose: + - 'vcvtnh_{type[3]}_f16(a) as i16' + + + - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u32", 'h'] + - ["f16", "u64", 'h'] + compose: + - LLVMLink: + name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtnu.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u16", 'h', 'u32'] + compose: + - 'vcvtnh_{type[3]}_f16(a) as u16' + + - name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding toward minus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: + unsafe: [neon] + types: + - [float32x2_t, int32x2_t] + - [float32x4_t, int32x4_t] + - [float64x1_t, int64x1_t] + - [float64x2_t, int64x2_t] + compose: + - LLVMLink: + name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtms.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding toward minus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - LLVMLink: + name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtms.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned integer, rounding toward minus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcvtm{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtmu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtm{type[2]}" + doc: "Floating-point convert to signed integer, rounding toward minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: + unsafe: [neon] + types: + - ["f32", "i32", 's_s32_f32'] + - ["f64", "i64", 'd_s64_f64'] + compose: + - LLVMLink: + name: "vcvtm{type[2]}" + links: + - link: "llvm.aarch64.neon.fcvtms.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding toward plus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: + unsafe: [neon] + types: + - [float32x2_t, int32x2_t] + - [float32x4_t, int32x4_t] + - [float64x1_t, int64x1_t] + - [float64x2_t, int64x2_t] + compose: + - LLVMLink: + name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtps.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtp{type[2]}" + doc: "Floating-point convert to signed integer, rounding toward plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: + unsafe: [neon] + types: + - ["f32", "i32", 's_s32_f32'] + - ["f64", "i64", 'd_s64_f64'] + compose: + - LLVMLink: + name: "vcvtp{type[2]}" + links: + - link: "llvm.aarch64.neon.fcvtps.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: + unsafe: [neon] + types: + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + compose: + - LLVMLink: + name: "vcvtn{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtnu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtn{type[2]}" + doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + safety: + unsafe: [neon] + types: + - ["f32", "u32", 's_u32_f32'] + - ["f64", "u64", 'd_u64_f64'] + compose: + - LLVMLink: + name: "vcvtn{type[2]}" links: - link: "llvm.aarch64.neon.fcvtnu.{type[1]}.{type[0]}" arch: aarch64,arm64ec @@ -1656,6 +2417,121 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtpu.{type[3]}.{type[0]}" arch: aarch64,arm64ec + + - name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed integer, rounding to plus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - LLVMLink: + name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtps.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to plus infinity" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcvtp{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtpu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding to plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "i32", 'h'] + - ["f16", "i64", 'h'] + compose: + - LLVMLink: + name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtps.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding to plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "i16", 'h', 'i32'] + compose: + - 'vcvtph_{type[3]}_f16(a) as i16' + + - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u32", 'h'] + - ["f16", "u64", 'h'] + compose: + - LLVMLink: + name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtpu.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u16", 'h', 'u32'] + compose: + - 'vcvtph_{type[3]}_f16(a) as u16' + - name: "vdup{neon_type.laneq_nox}" doc: "Set all vector lanes to the same value" arguments: ["a: {neon_type}"] @@ -1793,10 +2669,49 @@ intrinsics: - FnCall: [static_assert_uimm_bits!, [N, 3]] - FnCall: [simd_extract!, [a, 'N as u32']] + - name: "vdup{type[2]}" doc: "Set all vector lanes to the same value" arguments: ["a: {neon_type[0]}"] return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [float16x4_t, "f16", h_lane_f16] + compose: + - FnCall: [static_assert_uimm_bits!, [N, 2]] + - FnCall: [simd_extract!, [a, 'N as u32']] + + + - name: "vdup{type[2]}" + doc: "Extract an element from a vector" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 4']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [float16x8_t, "f16", h_laneq_f16] + compose: + - FnCall: [static_assert_uimm_bits!, [N, 4]] + - FnCall: [simd_extract!, [a, 'N as u32']] + + + - name: "vdup{type[2]}" + doc: "Extract an element from a vector" + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 8']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] @@ -2008,6 +2923,22 @@ intrinsics: compose: - MethodCall: [a, wrapping_neg, []] + + - name: "vnegh_{type}" + doc: Negate + arguments: ["a: {type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fneg] + safety: + unsafe: [neon] + types: + - f16 + compose: + - '-a' + - name: "vneg{neon_type.no}" doc: Negate arguments: ["a: {neon_type}"] @@ -2225,12 +3156,232 @@ intrinsics: - link: "llvm.rint.{neon_type}" arch: aarch64,arm64ec - - name: "vrnda{neon_type.no}" - doc: "Floating-point round to integral, to nearest with ties to away" + + - name: "vrndx{neon_type.no}" + doc: "Floating-point round to integral exact, using current rounding mode" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintx] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.rint.{neon_type}" + links: + - link: "llvm.rint.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrndx{type[1]}{type[0]}" + doc: "Floating-point round to integral, using current rounding mode" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintx] + safety: + unsafe: [neon] + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.rint.{type[0]}" + links: + - link: "llvm.rint.{type[0]}" + arch: aarch64,arm64ec + + + - name: "vrnda{neon_type.no}" + doc: "Floating-point round to integral, to nearest with ties to away" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [frinta] + safety: + unsafe: [neon] + types: + - float32x2_t + - float32x4_t + - float64x1_t + - float64x2_t + compose: + - LLVMLink: + name: "llvm.round.{neon_type}" + links: + - link: "llvm.round.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrnda{neon_type.no}" + doc: "Floating-point round to integral, to nearest with ties to away" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frinta] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.round.{neon_type}" + links: + - link: "llvm.round.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrnda{type[1]}{type[0]}" + doc: "Floating-point round to integral, to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frinta] + safety: + unsafe: [neon] + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.round.{type[0]}" + links: + - link: "llvm.round.{type[0]}" + arch: aarch64,arm64ec + + - name: "vrndn{neon_type.no}" + doc: "Floating-point round to integral, to nearest with ties to even" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [frintn] + safety: + unsafe: [neon] + types: + - float64x1_t + - float64x2_t + compose: + - LLVMLink: + name: "frintn.{neon_type}" + links: + - link: "llvm.aarch64.neon.frintn.{neon_type}" + arch: aarch64,arm64ec + + - name: "vrndns_{type}" + doc: "Floating-point round to integral, to nearest with ties to even" + arguments: ["a: {type}"] + return_type: "{type}" + attr: [*neon-stable] + assert_instr: [frintn] + safety: + unsafe: [neon] + types: + - f32 + compose: + - LLVMLink: + name: "roundeven.{type}" + links: + - link: "llvm.roundeven.{type}" + arch: aarch64,arm64ec + + - name: "vrndn{type[1]}{type[0]}" + doc: "Floating-point round to integral, toward minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintn] + safety: + unsafe: [neon] + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.roundeven.{type[0]}" + links: + - link: "llvm.roundeven.{type[0]}" + arch: aarch64,arm64ec + + - name: "vrndm{neon_type.no}" + doc: "Floating-point round to integral, toward minus infinity" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: [*neon-stable] + assert_instr: [frintm] + safety: + unsafe: [neon] + types: + - float32x2_t + - float32x4_t + - float64x1_t + - float64x2_t + compose: + - LLVMLink: + name: "llvm.floor.{neon_type}" + links: + - link: "llvm.floor.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrndm{neon_type.no}" + doc: "Floating-point round to integral, toward minus infinity" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintm] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.floor.{neon_type}" + links: + - link: "llvm.floor.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vrndm{type[1]}{type[0]}" + doc: "Floating-point round to integral, toward minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintm] + safety: + unsafe: [neon] + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.floor.{type[0]}" + links: + - link: "llvm.floor.{type[0]}" + arch: aarch64,arm64ec + + + + - name: "vrndp{neon_type.no}" + doc: "Floating-point round to integral, toward plus infinity" arguments: ["a: {neon_type}"] return_type: "{neon_type}" attr: [*neon-stable] - assert_instr: [frinta] + assert_instr: [frintp] safety: unsafe: [neon] types: @@ -2240,52 +3391,57 @@ intrinsics: - float64x2_t compose: - LLVMLink: - name: "llvm.round.{neon_type}" + name: "llvm.ceil.{neon_type}" links: - - link: "llvm.round.{neon_type}" + - link: "llvm.ceil.{neon_type}" arch: aarch64,arm64ec - - name: "vrndn{neon_type.no}" - doc: "Floating-point round to integral, to nearest with ties to even" + + - name: "vrndp{neon_type.no}" + doc: "Floating-point round to integral, toward plus infinity" arguments: ["a: {neon_type}"] return_type: "{neon_type}" - attr: [*neon-stable] - assert_instr: [frintn] + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintp] safety: unsafe: [neon] types: - - float64x1_t - - float64x2_t + - float16x4_t + - float16x8_t compose: - LLVMLink: - name: "frintn.{neon_type}" + name: "llvm.ceil.{neon_type}" links: - - link: "llvm.aarch64.neon.frintn.{neon_type}" + - link: "llvm.ceil.{neon_type}" arch: aarch64,arm64ec - - name: "vrndns_{type}" - doc: "Floating-point round to integral, to nearest with ties to even" - arguments: ["a: {type}"] - return_type: "{type}" - attr: [*neon-stable] - assert_instr: [frintn] + - name: "vrndp{type[1]}{type[0]}" + doc: "Floating-point round to integral, toward plus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintp] safety: unsafe: [neon] types: - - f32 + - [f16, 'h_'] compose: - LLVMLink: - name: "roundeven.{type}" + name: "llvm.ceil.{type[0]}" links: - - link: "llvm.roundeven.{type}" + - link: "llvm.ceil.{type[0]}" arch: aarch64,arm64ec - - name: "vrndm{neon_type.no}" - doc: "Floating-point round to integral, toward minus infinity" + - name: "vrnd{neon_type.no}" + doc: "Floating-point round to integral, toward zero" arguments: ["a: {neon_type}"] return_type: "{neon_type}" attr: [*neon-stable] - assert_instr: [frintm] + assert_instr: [frintz] safety: unsafe: [neon] types: @@ -2295,37 +3451,58 @@ intrinsics: - float64x2_t compose: - LLVMLink: - name: "llvm.floor.{neon_type}" + name: "llvm.trunc.{neon_type}" links: - - link: "llvm.floor.{neon_type}" + - link: "llvm.trunc.{neon_type}" arch: aarch64,arm64ec - - name: "vrndp{neon_type.no}" - doc: "Floating-point round to integral, toward plus infinity" + - name: "vrnd{neon_type.no}" + doc: "Floating-point round to integral, toward zero" arguments: ["a: {neon_type}"] return_type: "{neon_type}" - attr: [*neon-stable] - assert_instr: [frintp] + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintz] safety: unsafe: [neon] types: - - float32x2_t - - float32x4_t - - float64x1_t - - float64x2_t + - float16x4_t + - float16x8_t compose: - LLVMLink: - name: "llvm.ceil.{neon_type}" + name: "llvm.trunc.{neon_type}" links: - - link: "llvm.ceil.{neon_type}" + - link: "llvm.trunc.{neon_type}" arch: aarch64,arm64ec - - name: "vrnd{neon_type.no}" - doc: "Floating-point round to integral, toward zero" + + - name: "vrnd{type[1]}{type[0]}" + doc: "Floating-point round to integral, to nearest with ties to away" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [frintz] + safety: + unsafe: [neon] + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.trunc.{type[0]}" + links: + - link: "llvm.trunc.{type[0]}" + arch: aarch64,arm64ec + + + - name: "vrndi{neon_type.no}" + doc: "Floating-point round to integral, using current rounding mode" arguments: ["a: {neon_type}"] return_type: "{neon_type}" attr: [*neon-stable] - assert_instr: [frintz] + assert_instr: [frinti] safety: unsafe: [neon] types: @@ -2335,24 +3512,25 @@ intrinsics: - float64x2_t compose: - LLVMLink: - name: "llvm.trunc.{neon_type}" + name: "llvm.nearbyint.{neon_type}" links: - - link: "llvm.trunc.{neon_type}" + - link: "llvm.nearbyint.{neon_type}" arch: aarch64,arm64ec + - name: "vrndi{neon_type.no}" doc: "Floating-point round to integral, using current rounding mode" arguments: ["a: {neon_type}"] return_type: "{neon_type}" - attr: [*neon-stable] + attr: + - *neon-fp16 + - *neon-unstable-f16 assert_instr: [frinti] safety: unsafe: [neon] types: - - float32x2_t - - float32x4_t - - float64x1_t - - float64x2_t + - float16x4_t + - float16x8_t compose: - LLVMLink: name: "llvm.nearbyint.{neon_type}" @@ -2360,6 +3538,27 @@ intrinsics: - link: "llvm.nearbyint.{neon_type}" arch: aarch64,arm64ec + + - name: "vrndi{type[1]}{type[0]}" + doc: "Floating-point round to integral, using current rounding mode" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + # TODO: double check me + assert_instr: [frinti] + safety: + unsafe: [neon] + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.nearbyint.{type[0]}" + links: + - link: "llvm.nearbyint.{type[0]}" + arch: aarch64,arm64ec + - name: "vqadd{type[1]}" doc: Saturating add arguments: ["a: {type[0]}", "b: {type[0]}"] @@ -4265,6 +5464,28 @@ intrinsics: - link: "llvm.aarch64.neon.fmulx.{neon_type}" arch: aarch64,arm64ec + + - name: "vmulx{neon_type.no}" + doc: Floating-point multiply extended + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmulx] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fmulx.{neon_type.no}" + links: + - link: "llvm.aarch64.neon.fmulx.{neon_type}" + arch: aarch64,arm64ec + + - name: "vmulx{type[0]}" doc: Floating-point multiply extended arguments: ["a: {type[1]}", "b: {type[1]}"] @@ -4283,6 +5504,27 @@ intrinsics: - link: "llvm.aarch64.neon.fmulx.{type[1]}" arch: aarch64,arm64ec + + - name: "vmulx{type[0]}" + doc: Floating-point multiply extended + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmulx] + safety: + unsafe: [neon] + types: + - ["h_f16", "f16"] + compose: + - LLVMLink: + name: "fmulx.{type[1]}" + links: + - link: "llvm.aarch64.neon.fmulx.{type[1]}" + arch: aarch64,arm64ec + + - name: "vmulx_lane_f64" doc: Floating-point multiply extended arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -4408,6 +5650,36 @@ intrinsics: - b - "{type[5]}" + + - name: "vmulx{type[0]}" + doc: Floating-point multiply extended + arguments: ["a: {type[1]}", "b: {neon_type[2]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - ['_lane_f16', float16x4_t, float16x4_t, '2', '_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - ['_laneq_f16', float16x4_t, float16x8_t, '3', '_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - ['q_lane_f16', float16x8_t, float16x4_t, '2', 'q_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - ['q_laneq_f16', float16x8_t, float16x8_t, '3', 'q_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] + - FnCall: + - "vmulx{type[4]}" + - - a + - FnCall: + - "simd_shuffle!" + - - b + - b + - "{type[5]}" + + - name: "vmulx{type[0]}" doc: Floating-point multiply extended arguments: ["a: {type[1]}", "b: {neon_type[2]}"] @@ -4433,6 +5705,52 @@ intrinsics: - - b - "{type[5]}" + + - name: "vmulx{type[0]}" + doc: Floating-point multiply extended + arguments: ["a: {type[1]}", "b: {neon_type[2]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - ['h_lane_f16', f16, float16x4_t, '2', 'h_f16', "LANE as u32"] + - ['h_laneq_f16', f16, float16x8_t, '3', 'h_f16', "LANE as u32"] + compose: + - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] + - FnCall: + - "vmulx{type[4]}" + - - a + - FnCall: + - "simd_extract!" + - - b + - "{type[5]}" + + + - name: "vmulx{neon_type[0].N}" + doc: "Vector multiply by scalar" + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, "f16"] + - [float16x8_t, "f16"] + compose: + - FnCall: + - vmulx{neon_type[0].no} + - - a + - FnCall: ["vdup{neon_type[0].N}", [b]] + - name: "vfma{neon_type.no}" doc: Floating-point fused Multiply-Add to accumulator(vector) arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] @@ -4516,6 +5834,28 @@ intrinsics: - "vdupq_n_f64" - - c + - name: "vfma{neon_type[0].N}" + doc: Floating-point fused Multiply-Subtract from accumulator. + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmla] + safety: + unsafe: [neon] + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - FnCall: + - "vfma{neon_type[0].no}" + - - a + - b + - FnCall: + - "vdup{neon_type[0].N}" + - - c + - name: "vdiv{neon_type.no}" doc: "Divide" arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -4532,6 +5872,37 @@ intrinsics: compose: - FnCall: [simd_div, [a, b]] + - name: "vdiv{neon_type.no}" + doc: "Divide" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fdiv] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: [simd_div, [a, b]] + + - name: "vdiv{type[1]}_{type[0]}" + doc: Divide + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [f16, 'h'] + compose: + - 'a / b' + - name: "vsub{neon_type.no}" doc: "Subtract" arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -4558,7 +5929,22 @@ intrinsics: - ['d_s64', 'i64'] - ['d_u64', 'u64'] compose: - - MethodCall: [a, wrapping_sub, [b]] + - MethodCall: [a, wrapping_sub, [b]] + + - name: "vsub{type[0]}" + doc: "Subtract" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - ['h_f16', 'f16'] + compose: + - 'a - b' - name: "vaddv{neon_type[0].no}" doc: Floating-point add across vector @@ -4835,42 +6221,331 @@ intrinsics: attr: - FnCall: [target_feature, ['enable = "neon,fcma"']] - *neon-unstable-fcma - assert_instr: [fcadd] + assert_instr: [fcadd] + safety: + unsafe: [neon] + types: + - float32x2_t + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcadd.rot90.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcadd.rot90.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcadd{neon_type.rot270}" + doc: "Floating-point complex add" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *enable-fcma + - *neon-unstable-f16 + assert_instr: [fcadd] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vcadd.rot270.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcadd.rot270.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcadd{neon_type.rot90}" + doc: "Floating-point complex add" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-fp16 + - *enable-fcma + - *neon-unstable-f16 + assert_instr: [fcadd] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vcadd.rot90.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcadd.rot90.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcmla{neon_type.no}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-unstable-fcma + assert_instr: [fcmla] + safety: + unsafe: [neon] + types: + - float32x2_t + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot0.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot0.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcmla{neon_type.no}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fcmla] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot0.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot0.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcmla{neon_type.rot90}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-unstable-fcma + assert_instr: [fcmla] + safety: + unsafe: [neon] + types: + - float32x2_t + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot90.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot90.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcmla{neon_type.rot90}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fcmla] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot90.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot90.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcmla{neon_type.rot270}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-unstable-fcma + assert_instr: [fcmla] + safety: + unsafe: [neon] + types: + - float32x2_t + - float32x4_t + - float64x2_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot270.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot270.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vcmla{neon_type.rot270}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fcmla] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.aarch64.neon.vcmla.rot270.{neon_type}" + links: + - link: "llvm.aarch64.neon.vcmla.rot270.{neon_type}" + arch: aarch64,arm64ec + + - name: "vcmla{neon_type[0].laneq_nox}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] + + - name: "vcmla{neon_type[0].laneq_nox}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 2]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot90_laneq}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot90_laneq}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 2]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot90_lane}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-fcma + static_defs: ["const LANE: i32"] safety: unsafe: [neon] types: - - float32x2_t - - float32x4_t - - float64x2_t + - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] compose: - - LLVMLink: - name: "llvm.aarch64.neon.vcadd.rot90.{neon_type}" - links: - - link: "llvm.aarch64.neon.vcadd.rot90.{neon_type}" - arch: aarch64,arm64ec + - FnCall: [static_assert!, ['LANE == 0']] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] - - name: "vcmla{neon_type.no}" + - name: "vcmla{neon_type[0].rot90_lane}" doc: Floating-point complex multiply accumulate - arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] - return_type: "{neon_type}" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" attr: - FnCall: [target_feature, ['enable = "neon,fcma"']] - - *neon-unstable-fcma - assert_instr: [fcmla] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] safety: unsafe: [neon] types: - - float32x2_t - - float32x4_t - - float64x2_t + - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] compose: - - LLVMLink: - name: "llvm.aarch64.neon.vcmla.rot0.{neon_type}" - links: - - link: "llvm.aarch64.neon.vcmla.rot0.{neon_type}" - arch: aarch64,arm64ec + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] - - name: "vcmla{neon_type.rot90}" + - name: "vcmla{neon_type.rot180}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] return_type: "{neon_type}" @@ -4886,33 +6561,35 @@ intrinsics: - float64x2_t compose: - LLVMLink: - name: "llvm.aarch64.neon.vcmla.rot90.{neon_type}" + name: "llvm.aarch64.neon.vcmla.rot180.{neon_type}" links: - - link: "llvm.aarch64.neon.vcmla.rot90.{neon_type}" + - link: "llvm.aarch64.neon.vcmla.rot180.{neon_type}" arch: aarch64,arm64ec - - name: "vcmla{neon_type.rot270}" + + - name: "vcmla{neon_type.rot180}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] return_type: "{neon_type}" attr: - FnCall: [target_feature, ['enable = "neon,fcma"']] - - *neon-unstable-fcma + - *neon-fp16 + - *neon-unstable-f16 assert_instr: [fcmla] safety: unsafe: [neon] types: - - float32x2_t - - float32x4_t - - float64x2_t + - float16x4_t + - float16x8_t compose: - LLVMLink: - name: "llvm.aarch64.neon.vcmla.rot270.{neon_type}" + name: "llvm.aarch64.neon.vcmla.rot180.{neon_type}" links: - - link: "llvm.aarch64.neon.vcmla.rot270.{neon_type}" + - link: "llvm.aarch64.neon.vcmla.rot180.{neon_type}" arch: aarch64,arm64ec - - name: "vcmla{neon_type[0].laneq_nox}" + + - name: "vcmla{neon_type[0].rot180_laneq}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] return_type: "{neon_type[0]}" @@ -4933,9 +6610,9 @@ intrinsics: - c - "{neon_type[0]}" - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] + - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] - - name: "vcmla{neon_type[0].rot90_laneq}" + - name: "vcmla{neon_type[0].rot180_laneq}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] return_type: "{neon_type[0]}" @@ -4943,22 +6620,25 @@ intrinsics: - FnCall: [target_feature, ['enable = "neon,fcma"']] - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - *neon-unstable-fcma + - *neon-fp16 + - *neon-unstable-f16 static_defs: ["const LANE: i32"] safety: unsafe: [neon] types: - - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x8_t, + '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]' + ] compose: - - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - FnCall: [static_assert_uimm_bits!, [LANE, 2]] - Let: - c - "{neon_type[0]}" - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] + - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] - - name: "vcmla{neon_type[0].rot90_lane}" + - name: "vcmla{type[3]}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] return_type: "{neon_type[0]}" @@ -4971,38 +6651,43 @@ intrinsics: safety: unsafe: [neon] types: - - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]', '_rot180_lane_f32'] + - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', 'q_rot180_lane_f32'] compose: - FnCall: [static_assert!, ['LANE == 0']] - Let: - c - "{neon_type[0]}" - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] + - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] - - name: "vcmla{neon_type.rot180}" + - name: "vcmla{type[3]}" doc: Floating-point complex multiply accumulate - arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] - return_type: "{neon_type}" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" attr: - FnCall: [target_feature, ['enable = "neon,fcma"']] - - *neon-unstable-fcma - assert_instr: [fcmla] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] safety: unsafe: [neon] types: - - float32x2_t - - float32x4_t - - float64x2_t + - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', '_rot180_lane_f16'] + - [float16x8_t, float16x4_t, + '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', 'q_rot180_lane_f16' + ] compose: - - LLVMLink: - name: "llvm.aarch64.neon.vcmla.rot180.{neon_type}" - links: - - link: "llvm.aarch64.neon.vcmla.rot180.{neon_type}" - arch: aarch64,arm64ec + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: + - c + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] - - name: "vcmla{neon_type[0].rot180_laneq}" + - name: "vcmla{neon_type[0].rot270_laneq}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] return_type: "{neon_type[0]}" @@ -5023,9 +6708,9 @@ intrinsics: - c - "{neon_type[0]}" - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] + - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] - - name: "vcmla{type[3]}" + - name: "vcmla{neon_type[0].rot270_laneq}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] return_type: "{neon_type[0]}" @@ -5033,22 +6718,23 @@ intrinsics: - FnCall: [target_feature, ['enable = "neon,fcma"']] - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - *neon-unstable-fcma + - *neon-fp16 + - *neon-unstable-f16 static_defs: ["const LANE: i32"] safety: unsafe: [neon] types: - - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]', '_rot180_lane_f32'] - - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', 'q_rot180_lane_f32'] + - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] compose: - - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: [static_assert_uimm_bits!, [LANE, 2]] - Let: - c - "{neon_type[0]}" - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] + - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] - - name: "vcmla{neon_type[0].rot270_laneq}" + - name: "vcmla{neon_type[0].lane_nox}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] return_type: "{neon_type[0]}" @@ -5061,15 +6747,16 @@ intrinsics: safety: unsafe: [neon] types: - - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] compose: - - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - FnCall: [static_assert!, ['LANE == 0']] - Let: - c - "{neon_type[0]}" - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] - - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] + - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] + - name: "vcmla{neon_type[0].lane_nox}" doc: Floating-point complex multiply accumulate @@ -5079,15 +6766,16 @@ intrinsics: - FnCall: [target_feature, ['enable = "neon,fcma"']] - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - *neon-unstable-fcma + - *neon-fp16 + - *neon-unstable-f16 static_defs: ["const LANE: i32"] safety: unsafe: [neon] types: - - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] compose: - - FnCall: [static_assert!, ['LANE == 0']] + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] - Let: - c - "{neon_type[0]}" @@ -5114,6 +6802,28 @@ intrinsics: - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}] - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] + + - name: "vcmla{neon_type[0].rot270_lane}" + doc: Floating-point complex multiply accumulate + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,fcma"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, 1]] + - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}] + - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] + - name: "vdot{neon_type[0].laneq_nox}" doc: Dot product arithmetic (indexed) arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] @@ -5194,6 +6904,28 @@ intrinsics: - link: "llvm.aarch64.neon.fmax.{neon_type}" arch: aarch64,arm64ec + + - name: "vmaxh_{type}" + doc: Maximum (vector) + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmax] + safety: + unsafe: [neon] + types: + - f16 + compose: + - LLVMLink: + name: "vmaxh.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmax.{type}" + arch: aarch64,arm64ec + + + - name: "vmaxnm{neon_type.no}" doc: Floating-point Maximum Number (vector) arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -5212,6 +6944,47 @@ intrinsics: - link: "llvm.aarch64.neon.fmaxnm.{neon_type}" arch: aarch64,arm64ec + + - name: "vmaxnmh_{type}" + doc: Floating-point Maximum Number + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmaxnm] + safety: + unsafe: [neon] + types: + - f16 + compose: + - LLVMLink: + name: "vmaxh.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmaxnm.{type}" + arch: aarch64,arm64ec + + + - name: "vminnmh_{type}" + doc: Floating-point Minimum Number + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fminnm] + safety: + unsafe: [neon] + types: + - f16 + compose: + - LLVMLink: + name: "vminh.{neon_type}" + links: + - link: "llvm.aarch64.neon.fminnm.{type}" + arch: aarch64,arm64ec + + - name: "vmaxnmv{neon_type[0].no}" doc: Floating-point maximum number across vector arguments: ["a: {neon_type[0]}"] @@ -5239,12 +7012,95 @@ intrinsics: safety: unsafe: [neon] types: - - [float32x4_t, f32] + - [float32x4_t, f32] + compose: + - LLVMLink: + name: "fmaxnmv.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vmaxnmv{neon_type[0].no}" + doc: Floating-point maximum number across vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmaxnmv] + safety: + unsafe: [neon] + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - LLVMLink: + name: "fmaxnmv.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vminnmv{neon_type[0].no}" + doc: Floating-point minimum number across vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fminnmv] + safety: + unsafe: [neon] + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - LLVMLink: + name: "fminnmv.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + + - name: "vmaxv{neon_type[0].no}" + doc: Floating-point maximum number across vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmaxv] + safety: + unsafe: [neon] + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - LLVMLink: + name: "fmaxv.{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fmaxv.{type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vminv{neon_type[0].no}" + doc: Floating-point minimum number across vector + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fminv] + safety: + unsafe: [neon] + types: + - [float16x4_t, f16] + - [float16x8_t, f16] compose: - LLVMLink: - name: "fmaxnmv.{neon_type[0]}" + name: "fminv.{neon_type[0]}" links: - - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}" + - link: "llvm.aarch64.neon.fminv.{type[1]}.{neon_type[0]}" arch: aarch64,arm64ec - name: "vpmax{type[0]}" @@ -5283,6 +7139,27 @@ intrinsics: - link: "llvm.aarch64.neon.fmin.{neon_type}" arch: aarch64,arm64ec + + - name: "vminh_{type}" + doc: Minimum (vector) + arguments: ["a: {type}", "b: {type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmin] + safety: + unsafe: [neon] + types: + - f16 + compose: + - LLVMLink: + name: "vminh.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmin.{type}" + arch: aarch64,arm64ec + + - name: "vminnm{neon_type.no}" doc: "Floating-point Minimum Number (vector)" arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -5394,6 +7271,111 @@ intrinsics: - link: "llvm.aarch64.neon.faddp.{neon_type}" arch: aarch64,arm64ec + + - name: "vpadd{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [faddp] + safety: + unsafe: [neon] + types: + - float16x8_t + compose: + - LLVMLink: + name: "faddp.{neon_type}" + links: + - link: "llvm.aarch64.neon.faddp.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vpmax{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmaxp] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fmaxp.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmaxp.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vpmaxnm{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmaxnmp] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fmaxnmp.{neon_type}" + links: + - link: "llvm.aarch64.neon.fmaxnmp.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vpmin{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fminp] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fminp.{neon_type}" + links: + - link: "llvm.aarch64.neon.fminp.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vpminnm{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{type}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fminnmp] + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fminnmp.{neon_type}" + links: + - link: "llvm.aarch64.neon.fminnmp.{neon_type}" + arch: aarch64,arm64ec + + - name: "vpadd{type[0]}" doc: "Floating-point add pairwise" arguments: ["a: {neon_type[1]}"] @@ -6874,6 +8856,41 @@ intrinsics: compose: - FnCall: [simd_fsqrt, [a]] + - name: "vsqrt{neon_type.no}" + doc: "Calculates the square root of each lane." + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fsqrt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: [simd_fsqrt, [a]] + + - name: "vsqrt{type[1]}{type[0]}" + doc: "Floating-point round to integral, using current rounding mode" + arguments: ["a: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fsqrt] + safety: + unsafe: [neon] + types: + - [f16, 'h_'] + compose: + - LLVMLink: + name: "llvm.sqrt.{type[0]}" + links: + - link: "llvm.sqrt.{type[0]}" + arch: aarch64,arm64ec + - name: "vrsqrts{type[0]}" doc: "Floating-point reciprocal square root step" arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] @@ -6912,6 +8929,27 @@ intrinsics: - link: "llvm.aarch64.neon.frsqrts.{type[1]}" arch: aarch64,arm64ec + + - name: "vrsqrts{type[0]}" + doc: "Floating-point reciprocal square root step" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrts]]}]] + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [h_f16, "f16"] + compose: + - LLVMLink: + name: "vrsqrts{type[0]}" + links: + - link: "llvm.aarch64.neon.frsqrts.{type[1]}" + arch: aarch64,arm64ec + + - name: "vrecpe{type[0]}" doc: "Reciprocal estimate." arguments: ["a: {type[1]}"] @@ -6950,6 +8988,27 @@ intrinsics: - link: "llvm.aarch64.neon.frecpe.{type[1]}" arch: aarch64,arm64ec + + - name: "vrecpe{type[0]}" + doc: "Reciprocal estimate." + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpe]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [h_f16, "f16"] + compose: + - LLVMLink: + name: "vrecpe{type[0]}" + links: + - link: "llvm.aarch64.neon.frecpe.{type[1]}" + arch: aarch64,arm64ec + + - name: "vrecps{type[0]}" doc: "Floating-point reciprocal step" arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] @@ -6988,6 +9047,27 @@ intrinsics: - link: "llvm.aarch64.neon.frecps.{type[1]}" arch: aarch64,arm64ec + + - name: "vrecps{type[0]}" + doc: "Floating-point reciprocal step" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecps]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [h_f16, "f16"] + compose: + - LLVMLink: + name: "vrecps{type[0]}" + links: + - link: "llvm.aarch64.neon.frecps.{type[1]}" + arch: aarch64,arm64ec + + - name: "vrecpx{type[0]}" doc: "Floating-point reciprocal exponent" arguments: ["a: {type[1]}"] @@ -7007,6 +9087,27 @@ intrinsics: - link: "llvm.aarch64.neon.frecpx.{type[1]}" arch: aarch64,arm64ec + + - name: "vrecpx{type[0]}" + doc: "Floating-point reciprocal exponent" + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpx]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [h_f16, "f16"] + compose: + - LLVMLink: + name: "vrecpxs{type[0]}" + links: + - link: "llvm.aarch64.neon.frecpx.{type[1]}" + arch: aarch64,arm64ec + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" doc: Vector reinterpret cast operation arguments: ["a: {type[0]}"] @@ -7081,6 +9182,27 @@ intrinsics: compose: - FnCall: [transmute, [a]] + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [float64x1_t, float16x4_t] + - [float16x4_t, float64x1_t] + # q + - [float64x2_t, float16x8_t] + - [float16x8_t, float64x2_t] + compose: + - FnCall: [transmute, [a]] + + - name: "vrshld_s64" doc: "Signed rounding shift left" arguments: ["a: {type}", "b: {type}"] @@ -7973,6 +10095,23 @@ intrinsics: compose: - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + - name: "vtrn1{neon_type[0].no}" + doc: Transpose vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn1]]}]] + safety: + unsafe: [neon] + types: + - [float16x4_t, '[0, 4, 2, 6]'] + - [float16x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + - name: "vtrn1{neon_type[0].no}" doc: Transpose vectors arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -8021,6 +10160,22 @@ intrinsics: compose: - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + - name: "vtrn2{neon_type[0].no}" + doc: Transpose vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn2]]}]] + safety: + unsafe: [neon] + types: + - [float16x4_t, '[1, 5, 3, 7]'] + - [float16x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + - name: "vtrn2{neon_type[0].no}" doc: Transpose vectors arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -8076,6 +10231,22 @@ intrinsics: compose: - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + - name: "vzip2{neon_type[0].no}" + doc: Zip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]] + safety: + unsafe: [neon] + types: + - [float16x4_t, '[2, 6, 3, 7]'] + - [float16x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + - name: "vzip1{neon_type[0].no}" doc: Zip vectors arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -8111,6 +10282,23 @@ intrinsics: compose: - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + + - name: "vzip1{neon_type[0].no}" + doc: Zip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]] + safety: + unsafe: [neon] + types: + - [float16x4_t, '[0, 4, 1, 5]'] + - [float16x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + - name: "vuzp1{neon_type[0].no}" doc: Unzip vectors arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -8159,6 +10347,22 @@ intrinsics: compose: - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + - name: "vuzp1{neon_type[0].no}" + doc: Unzip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp1]]}]] + safety: + unsafe: [neon] + types: + - [float16x4_t, '[0, 2, 4, 6]'] + - [float16x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]'] + compose: + - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]] + - name: "vuzp2{neon_type[0].no}" doc: Unzip vectors arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -8211,6 +10415,26 @@ intrinsics: - b - "{type[1]}" + - name: "vuzp2{neon_type[0].no}" + doc: Unzip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp2]]}]] + safety: + unsafe: [neon] + types: + - [float16x4_t, '[1, 3, 5, 7]'] + - [float16x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]'] + compose: + - FnCall: + - "simd_shuffle!" + - - a + - b + - "{type[1]}" + - name: "vabal_high_{neon_type[1]}" doc: "Unsigned Absolute difference and Accumulate Long" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] @@ -8485,6 +10709,76 @@ intrinsics: - b - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + + - name: "vfma{type[3]}" + doc: "Floating-point fused multiply-add to accumulator" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmla, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4_t, '2', '_lane_f16'] + - [float16x4_t, float16x8_t, '3', '_laneq_f16'] + - [float16x8_t, float16x4_t, '2', 'q_lane_f16'] + - [float16x8_t, float16x8_t, '3', 'q_laneq_f16'] + compose: + - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]] + - FnCall: + - "vfma{neon_type[0].no}" + - - a + - b + - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + + + # vfms lane f16 + - name: "vfms{type[3]}" + doc: "Floating-point fused multiply-subtract from accumulator" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4_t, '2', '_lane_f16'] + - [float16x4_t, float16x8_t, '3', '_laneq_f16'] + - [float16x8_t, float16x4_t, '2', 'q_lane_f16'] + - [float16x8_t, float16x8_t, '3', 'q_laneq_f16'] + compose: + - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]] + - FnCall: + - "vfms{neon_type[0].no}" + - - a + - b + - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + + + - name: "vfms{type[1]}" + doc: "Floating-point fused multiply-subtract from accumulator" + arguments: ["a: {type[0]}", "b: {type[0]}", "c: {type[0]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "h_f16"] + compose: + - FnCall: ["vfma{type[1]}", [a, -b, c]] + + - name: "vfma_lane_f64" doc: "Floating-point fused multiply-add to accumulator" arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] @@ -8598,6 +10892,68 @@ intrinsics: - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [c, 'LANE as u32']]}] - FnCall: ["_vfmad_lane_f64", [b, c, a]] + + - name: "vfma{type[1]}" + doc: "Floating-point fused multiply-add to accumulator" + arguments: ["a: {type[0]}", "b: {type[0]}", "c: {type[0]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "h_f16"] + compose: + - LLVMLink: + name: "_vfma_{type[1]}" + links: + - link: "llvm.fma.{type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vfma{type[1]}", [b, c, a]] + + + - name: "vfmah_lane{type[2]}" + doc: "Floating-point fused multiply-add to accumulator" + arguments: ["a: {type[0]}", "b: {type[0]}", "v: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ["f16", float16x4_t, '_f16', '2'] + - ["f16", float16x8_t, 'q_f16', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [v, 'LANE as u32']]}] + - FnCall: ["vfmah_{type[0]}", [a, b, c]] + + - name: "vfmsh_lane{type[2]}" + doc: "Floating-point fused multiply-subtract from accumulator" + arguments: ["a: {type[0]}", "b: {type[0]}", "v: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ["f16", float16x4_t, '_f16', '2'] + - ["f16", float16x8_t, 'q_f16', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [v, 'LANE as u32']]}] + - FnCall: ["vfmsh_{type[0]}", [a, b, c]] + - name: "vfms_f64" doc: "Floating-point fused multiply-subtract from accumulator" arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] @@ -8734,6 +11090,7 @@ intrinsics: compose: - FnCall: ["vfma{type[2]}::", ['a', '-b', 'c']] + - name: "vceqz{neon_type[0].no}" doc: "Floating-point compare bitwise equal to zero" arguments: ["a: {neon_type[0]}"] @@ -8752,6 +11109,23 @@ intrinsics: - Let: [b, '{type[2]}', '{type[3]}'] - FnCall: [simd_eq, [a, {FnCall: [transmute, [b]]}]] + - name: "vceqz{neon_type[0].no}" + doc: "Floating-point compare bitwise equal to zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t, 'f16x4', 'f16x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float16x8_t, uint16x8_t, 'f16x8', 'f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)'] + compose: + - Let: [b, '{type[2]}', '{type[3]}'] + - FnCall: [simd_eq, [a, {FnCall: [transmute, [b]]}]] + - name: "vceqz{type[2]}" doc: "Floating-point compare bitwise equal to zero" arguments: ["a: {type[0]}"] @@ -8772,6 +11146,26 @@ intrinsics: - - FnCall: ["vdup_n_{type[0]}", [a]] - '0' + - name: "vceqz{type[2]}" + doc: "Floating-point compare bitwise equal to zero" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u16", "h_f16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vceqz_{type[0]}" + - - FnCall: ["vdup_n_{type[0]}", [a]] + - '0' + - name: "vceqzd_{type[2]}" doc: "Compare bitwise equal to zero" arguments: ["a: {type[0]}"] @@ -8881,6 +11275,28 @@ intrinsics: - FnCall: ["vdup_n_{type[1]}", [b]] - '0' + + - name: "vcge{type[0]}" + doc: "Floating-point compare greater than or equal" + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vcge_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - FnCall: ["vdup_n_{type[1]}", [b]] + - '0' + - name: "vcge{neon_type[0].no}" doc: "Floating-point compare greater than or equal" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -9009,6 +11425,27 @@ intrinsics: - - FnCall: ["vdup_n_{type[1]}", [a]] - '0' + + - name: "vcgez{type[0]}" + doc: "Floating-point compare greater than or equal to zero" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vcgez_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - '0' + - name: "vclezd_s64" doc: "Compare less than or equal to zero" arguments: ["a: {type[0]}"] @@ -9126,6 +11563,26 @@ intrinsics: - - FnCall: ["vdup_n_{type[1]}", [a]] - '0' + - name: "vcgtz{type[0]}" + doc: "Floating-point compare greater than zero" + arguments: ["a: {type[1]}"] + return_type: "{type[2]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16", "u16"] + compose: + - FnCall: + - "simd_extract!" + - - FnCall: + - "vcgtz_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - '0' + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" doc: "Floating-point convert to unsigned fixed-point, rounding toward zero" arguments: ["a: {neon_type[0]}"] @@ -9265,6 +11722,68 @@ intrinsics: - - a - FnCall: [simd_shuffle!, [b, b, '[LANE as u32, LANE as u32]']] + + # vmulq_laneq_f16 + - name: "vmul{type[2]}{neon_type[1].no}" + doc: "Floating-point multiply" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x8_t, '_lane', "[LANE as u32, LANE as u32, LANE as u32, LANE as u32]"] + - [float16x8_t, float16x8_t, 'q_lane', "[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]"] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '3']] + - FnCall: + - simd_mul + - - a + - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]] + + + - name: "vmul{type[1]}_{type[0]}" + doc: Add + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [f16, 'h'] + compose: + - 'a * b' + + + - name: "vmul{type[2]}" + doc: "Floating-point multiply" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ["f32", float32x2_t, "s_lane_f32", '1'] + - ["f32", float32x4_t, "s_laneq_f32", '2'] + - ["f64", float64x2_t, "d_laneq_f64", '1'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}] + - Identifier: ['a * b', Symbol] + + - name: "vmul{type[2]}" doc: "Floating-point multiply" arguments: ["a: {type[0]}", "b: {neon_type[1]}"] @@ -9272,19 +11791,20 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-fp16 + - *neon-unstable-f16 static_defs: ['const LANE: i32'] safety: unsafe: [neon] types: - - ["f32", float32x2_t, "s_lane_f32", '1'] - - ["f32", float32x4_t, "s_laneq_f32", '2'] - - ["f64", float64x2_t, "d_laneq_f64", '1'] + - ["f16", float16x4_t, "h_lane_f16", '2'] + - ["f16", float16x8_t, "h_laneq_f16", '3'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}] - Identifier: ['a * b', Symbol] + - name: "vrsrad_n_s64" doc: "Signed rounding shift right and accumulate." arguments: ["a: {type}", "b: {type}"] @@ -9417,6 +11937,28 @@ intrinsics: - FnCall: ["vdup_n_{type[0]}", [b]] - '0' + + - name: "vclt{type[2]}" + doc: "Floating-point compare less than" + arguments: ["a: {type[0]}", "b: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u16", 'h_f16'] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vclt_{type[0]}" + - - FnCall: ["vdup_n_{type[0]}", [a]] + - FnCall: ["vdup_n_{type[0]}", [b]] + - '0' + - name: "vabdl_high_{neon_type[0]}" doc: "Unsigned Absolute difference Long" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -9471,6 +12013,30 @@ intrinsics: - b - FnCall: ["vdup{neon_type[1].N}", [c]] + + - name: "vfms{neon_type[0].N}" + doc: Floating-point fused Multiply-Subtract from accumulator. + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [fmls] + safety: + unsafe: [neon] + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - FnCall: + - "vfms{neon_type[0].no}" + - - a + - b + - FnCall: + - "vdup{neon_type[0].N}" + - - c + + - name: "vpminnm{type[0]}" doc: "Floating-point minimum number pairwise" arguments: ["a: {neon_type[1]}"] @@ -9711,6 +12277,28 @@ intrinsics: - link: "llvm.aarch64.neon.frsqrte.{type[1]}" arch: aarch64,arm64ec + + - name: "vrsqrte{type[0]}" + doc: "Reciprocal square-root estimate." + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-fp16 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["h_f16", "f16"] + compose: + - LLVMLink: + name: "vrsqrte{neon_type[1].no}" + links: + - link: "llvm.aarch64.neon.frsqrte.{type[1]}" + arch: aarch64,arm64ec + + - name: "vpminnm{neon_type.no}" doc: "Floating-point Minimum Number Pairwise (vector)." arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -9777,6 +12365,28 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtau.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec + + - name: "vcvta{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to away" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcvta{neon_type[1].no}_{neon_type[0]}" + links: + - link: "llvm.aarch64.neon.fcvtau.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" doc: "Floating-point convert to signed fixed-point, rounding toward zero" arguments: ["a: {neon_type[0]}"] @@ -9796,6 +12406,79 @@ intrinsics: - link: "llvm.fptosi.sat.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec + - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding towards minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "i32", 'h'] + - ["f16", "i64", 'h'] + compose: + - LLVMLink: + name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtms.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding towards minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "i16", 'h', 'i32'] + compose: + - 'vcvtmh_{type[3]}_f16(a) as i16' + + + - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to unsigned integer, rounding towards minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u32", 'h'] + - ["f16", "u64", 'h'] + compose: + - LLVMLink: + name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + return_type: "{type[1]}" + links: + - link: "llvm.aarch64.neon.fcvtmu.{type[1]}.{type[0]}" + arch: aarch64,arm64ec + + - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" + doc: "Floating-point convert to integer, rounding towards minus infinity" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["f16", "u16", 'h', 'u32'] + compose: + - 'vcvtmh_{type[3]}_f16(a) as u16' + - name: "vmlal_high_n_{neon_type[1]}" doc: "Multiply-add long" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] @@ -10860,6 +13543,27 @@ intrinsics: - cast - [] + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers" + arguments: ["ptr: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [target_feature, ['enable = "{type[2]}"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ldr]]}]] + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ['*const f16', float16x4_t, "neon,fp16"] + - ['*const f16', float16x8_t, "neon,fp16"] + compose: + - FnCall: + - 'crate::ptr::read_unaligned' + - - MethodCall: + - ptr + - cast + - [] + - name: "vst1{neon_type[1].no}" doc: "Store multiple single-element structures from one, two, three, or four registers." arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"] @@ -10906,6 +13610,28 @@ intrinsics: - [] - a + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures from one, two, three, or four registers." + arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"] + attr: + - FnCall: [target_feature, ['enable = "{type[2]}"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [str]]}]] + - FnCall: [allow, ['clippy::cast_ptr_alignment']] + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ['*mut f16', float16x4_t, "neon,fp16"] + - ['*mut f16', float16x8_t, "neon,fp16"] + compose: + - FnCall: + - 'crate::ptr::write_unaligned' + - - MethodCall: + - ptr + - cast + - [] + - a + - name: "__crc32d" doc: "CRC32 single round checksum for quad words (64 bits)." arguments: ["crc: {type[0]}", "data: {type[1]}"] @@ -11650,3 +14376,196 @@ intrinsics: - FnCall: - transmute - - b + + - name: "vfmlal{type[2]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Add Long to accumulator (vector)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *enable-fhm + - *neon-unstable-f16 + assert_instr: [fmlal2] + safety: + unsafe: [neon] + types: + - [float32x2_t, float16x4_t, '_high_'] + - [float32x4_t, float16x8_t, 'q_high_'] + compose: + - LLVMLink: + name: "vfmlal{type[2]}.{neon_type[0]}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.fmlal2.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + + + - name: "vfmlal{type[3]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Add Long to accumulator (by element)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmlal2, 'LANE = 0']]}]] + - *neon-fp16 + - *enable-fhm + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [float32x2_t, float16x4_t, float16x4_t, '_lane_high_', '_high_', '2'] + - [float32x2_t, float16x4_t, float16x8_t, '_laneq_high_', '_high_', '3'] + - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_high_', 'q_high_', '2'] + - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_high_', 'q_high_', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] + - FnCall: + - "vfmlal{type[4]}{neon_type[1]}" + - - r + - a + - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]] + + + - name: "vfmlal{type[2]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Add Long to accumulator (vector)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *enable-fhm + - *neon-unstable-f16 + assert_instr: [fmlal] + safety: + unsafe: [neon] + types: + - [float32x2_t, float16x4_t, '_low_'] + - [float32x4_t, float16x8_t, 'q_low_'] + compose: + - LLVMLink: + name: "vfmlal{type[2]}.{neon_type[0]}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.fmlal.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + + + - name: "vfmlal{type[3]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Add Long to accumulator (by element)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmlal, 'LANE = 0']]}]] + - *neon-fp16 + - *enable-fhm + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [float32x2_t, float16x4_t, float16x4_t, '_lane_low_', '_low_', '2'] + - [float32x2_t, float16x4_t, float16x8_t, '_laneq_low_', '_low_', '3'] + - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_low_', 'q_low_', '2'] + - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_low_', 'q_low_', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] + - FnCall: + - "vfmlal{type[4]}{neon_type[1]}" + - - r + - a + - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]] + + + - name: "vfmlsl{type[2]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Subtract Long from accumulator (vector)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *enable-fhm + - *neon-unstable-f16 + assert_instr: [fmlsl2] + safety: + unsafe: [neon] + types: + - [float32x2_t, float16x4_t, '_high_'] + - [float32x4_t, float16x8_t, 'q_high_'] + compose: + - LLVMLink: + name: "vfmlsl{type[2]}.{neon_type[0]}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.fmlsl2.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vfmlsl{type[3]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Subtract Long from accumulator (by element)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmlsl2, 'LANE = 0']]}]] + - *neon-fp16 + - *enable-fhm + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [float32x2_t, float16x4_t, float16x4_t, '_lane_high_', '_high_', '2'] + - [float32x2_t, float16x4_t, float16x8_t, '_laneq_high_', '_high_', '3'] + - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_high_', 'q_high_', '2'] + - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_high_', 'q_high_', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] + - FnCall: + - "vfmlsl{type[4]}{neon_type[1]}" + - - r + - a + - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]] + + + - name: "vfmlsl{type[2]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Subtract Long from accumulator (vector)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-fp16 + - *enable-fhm + - *neon-unstable-f16 + assert_instr: [fmlsl] + safety: + unsafe: [neon] + types: + - [float32x2_t, float16x4_t, '_low_'] + - [float32x4_t, float16x8_t, 'q_low_'] + compose: + - LLVMLink: + name: "vfmlsl{type[2]}.{neon_type[0]}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.fmlsl.{neon_type[0]}.{neon_type[1]}" + arch: aarch64,arm64ec + + - name: "vfmlsl{type[3]}{neon_type[1]}" + doc: "Floating-point fused Multiply-Subtract Long from accumulator (by element)." + arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmlsl, 'LANE = 0']]}]] + - *neon-fp16 + - *enable-fhm + - FnCall: [rustc_legacy_const_generics, ['3']] + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [float32x2_t, float16x4_t, float16x4_t, '_lane_low_', '_low_', '2'] + - [float32x2_t, float16x4_t, float16x8_t, '_laneq_low_', '_low_', '3'] + - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_low_', 'q_low_', '2'] + - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_low_', 'q_low_', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] + - FnCall: + - "vfmlsl{type[4]}{neon_type[1]}" + - - r + - a + - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]] diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index 5d20bfc90c..2668efdb24 100644 --- a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -45,7 +45,7 @@ neon-stable-not-arm: &neon-stable-not-arm #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] neon-unstable-is-arm: &neon-unstable-is-arm - FnCall: [ cfg_attr, ['target_arch = "arm"', *neon-unstable]] + FnCall: [ cfg_attr, ['target_arch = "arm"', *neon-unstable]] # #[cfg_attr(all(test, not(target_env = "msvc"))] msvc-disabled: &msvc-disabled @@ -63,6 +63,13 @@ neon-aes: &neon-aes neon-i8mm: &neon-i8mm FnCall: [target_feature, ['enable = "neon,i8mm"']] +# #[target_feature(enable = "neon,fp16")] +neon-fp16: &neon-fp16 + FnCall: [target_feature, ['enable = "neon,fp16"']] + +enable-fcma: &enable-fcma + FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']]}, { FnCall: [target_feature, ['enable = "fcma"']] }]] + #[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))] neon-unstable-i8mm: &neon-unstable-i8mm FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']] }, { FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']] } ]] @@ -77,6 +84,10 @@ arm-crc-unstable: &arm-crc-unstable aarch64-crc-stable: &aarch64-crc-stable FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "stdarch_aarch64_crc32"', 'since = "1.80.0"']]}]] +# #[unstable(feature = "stdarch_neon_f16", issue = "136306")] +neon-unstable-f16: &neon-unstable-f16 + FnCall: [unstable, ['feature = "stdarch_neon_f16"', 'issue = "136306"']] + intrinsics: - name: "vand{neon_type.no}" doc: Vector bitwise and @@ -263,6 +274,30 @@ intrinsics: - link: "llvm.aarch64.neon.fabd.{neon_type}" arch: aarch64,arm64ec + - name: "vabd{neon_type.no}" + doc: Absolute difference between the arguments of Floating + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vabd.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fabd]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fabd.{neon_type}" + links: + - link: "llvm.arm.neon.vabds.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fabd.{neon_type}" + arch: aarch64,arm64ec + - name: "vabdl{neon_type[0].noq}" doc: Signed Absolute difference Long arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -379,6 +414,25 @@ intrinsics: compose: - FnCall: [simd_eq, [a, b]] + + - name: "vceq{neon_type[0].no}" + doc: "Floating-point compare equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vceq.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmeq]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [simd_eq, [a, b]] + - name: "vtst{neon_type[0].no}" doc: "Signed compare bitwise Test bits nonzero" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -425,6 +479,46 @@ intrinsics: compose: - FnCall: [simd_fabs, [a]] + - name: "vabs{neon_type.no}" + doc: "Floating-point absolute value" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vabs]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fabs]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: [simd_fabs, [a]] + + - name: "vabs{type[0]}" + doc: "Floating-point absolute value" + arguments: ["a: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vabs]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fabs]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ['h_f16', 'f16'] + compose: + - FnCall: + - simd_extract! + - - FnCall: + - "vabs_{type[1]}" + - - FnCall: ["vdup_n_{type[1]}", [a]] + - 0 + - name: "vcgt{neon_type[0].no}" doc: "Compare signed greater than" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -487,6 +581,45 @@ intrinsics: compose: - FnCall: [simd_gt, [a, b]] + + - name: "vcgt{neon_type[0].no}" + doc: "Floating-point compare greater than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcgt.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [simd_gt, [a, b]] + + + - name: "vcgtz{neon_type[0].no}" + doc: "Floating-point compare greater than zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcgt.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t, f16x4, 'f16x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float16x8_t, uint16x8_t, f16x8, 'f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: [simd_gt, [a, {FnCall: [transmute, [b]]}]] + - name: "vclt{neon_type[0].no}" doc: "Compare signed less than" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -549,6 +682,47 @@ intrinsics: compose: - FnCall: [simd_le, [a, b]] + + - name: "vcle{neon_type[0].no}" + doc: "Floating-point compare less than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcge.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [simd_le, [a, b]] + + - name: "vclez{neon_type[0].no}" + doc: "Floating-point compare less than or equal to zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcle.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmle]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t, f16x4, 'f16x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float16x8_t, uint16x8_t, f16x8, 'f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_le + - - a + - FnCall: [transmute, [b]] + - name: "vcge{neon_type[0].no}" doc: "Compare signed greater than or equal" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -721,6 +895,31 @@ intrinsics: - link: "llvm.aarch64.neon.facgt.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec + + - name: "vcagt{neon_type[0].no}" + doc: "Floating-point absolute compare greater than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vacgt.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [facgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcagt{neon_type[0].no}" + links: + - link: "llvm.arm.neon.vacgt.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.facgt.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - name: "vcage{neon_type[0].no}" doc: "Floating-point absolute compare greater than or equal" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -745,6 +944,30 @@ intrinsics: - link: "llvm.aarch64.neon.facge.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec + - name: "vcage{neon_type[0].no}" + doc: "Floating-point absolute compare greater than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vacge.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [facge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - LLVMLink: + name: "vcage{neon_type[0].no}" + links: + - link: "llvm.arm.neon.vacge.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.facge.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - name: "vcalt{neon_type[0].no}" doc: "Floating-point absolute compare less than" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -763,6 +986,24 @@ intrinsics: compose: - FnCall: ["vcagt{neon_type[0].no}", [b, a]] + - name: "vcalt{neon_type[0].no}" + doc: "Floating-point absolute compare less than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vacgt.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [facgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: ["vcagt{neon_type[0].no}", [b, a]] + - name: "vcale{neon_type[0].no}" doc: "Floating-point absolute compare less than or equal" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -781,6 +1022,25 @@ intrinsics: compose: - FnCall: ["vcage{neon_type[0].no}", [b, a]] + + - name: "vcale{neon_type[0].no}" + doc: "Floating-point absolute compare less than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vacge.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [facge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: ["vcage{neon_type[0].no}", [b, a]] + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" doc: "Fixed-point convert to floating-point" arguments: ["a: {neon_type[0]}"] @@ -799,6 +1059,24 @@ intrinsics: compose: - FnCall: [simd_cast, [a]] + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [scvtf]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [int16x4_t, float16x4_t] + - [int16x8_t, float16x8_t] + compose: + - FnCall: [simd_cast, [a]] + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" doc: "Fixed-point convert to floating-point" arguments: ["a: {neon_type[0]}"] @@ -817,33 +1095,23 @@ intrinsics: compose: - FnCall: [simd_cast, [a]] - - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" doc: "Fixed-point convert to floating-point" arguments: ["a: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: - - FnCall: [cfg, ['target_arch = "arm"']] - - FnCall: [target_feature, ['enable = "neon,v7"']] - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vcvt, 'N = 2']]}]] - - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']] - static_defs: ['const N: i32'] + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ucvtf]]}]] + - *neon-fp16 + - *neon-unstable-f16 safety: unsafe: [neon] types: - - [uint32x2_t, float32x2_t] - - [uint32x4_t, float32x4_t] + - [uint16x4_t, float16x4_t] + - [uint16x8_t, float16x8_t] compose: - - FnCall: [static_assert!, ['N >= 1 && N <= 32']] - - LLVMLink: - name: "vcvt{neon_type[1].N}_{neon_type[0]}" - arguments: - - "a: {neon_type[0]}" - - "n: i32" - links: - - link: "llvm.arm.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}" - arch: arm - - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]] + - FnCall: [simd_cast, [a]] - name: "vcvt{neon_type[1].N}_{neon_type[0]}" doc: "Fixed-point convert to floating-point" @@ -859,8 +1127,8 @@ intrinsics: safety: unsafe: [neon] types: - - [int32x2_t, float32x2_t] - - [int32x4_t, float32x4_t] + - [uint32x2_t, float32x2_t] + - [uint32x4_t, float32x4_t] compose: - FnCall: [static_assert!, ['N >= 1 && N <= 32']] - LLVMLink: @@ -869,9 +1137,9 @@ intrinsics: - "a: {neon_type[0]}" - "n: i32" links: - - link: "llvm.arm.neon.vcvtfxs2fp.{neon_type[1]}.{neon_type[0]}" + - link: "llvm.arm.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}" arch: arm - - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N]] + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]] - name: "vcvt{neon_type[1].N}_{neon_type[0]}" doc: "Fixed-point convert to floating-point" @@ -879,15 +1147,15 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] static_defs: ['const N: i32'] safety: unsafe: [neon] types: - - [int32x2_t, float32x2_t] - - [int32x4_t, float32x4_t] + - [uint32x2_t, float32x2_t] + - [uint32x4_t, float32x4_t] compose: - FnCall: [static_assert!, ['N >= 1 && N <= 32']] - LLVMLink: @@ -896,53 +1164,208 @@ intrinsics: - "a: {neon_type[0]}" - "n: i32" links: - - link: "llvm.aarch64.neon.vcvtfxs2fp.{neon_type[1]}.{neon_type[0]}" + - link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec - - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N]] + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]] - name: "vcvt{neon_type[1].N}_{neon_type[0]}" doc: "Fixed-point convert to floating-point" arguments: ["a: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: - - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]] + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcvt"', 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ucvtf, 'N = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-fp16 + - *neon-unstable-f16 static_defs: ['const N: i32'] safety: unsafe: [neon] types: - - [uint32x2_t, float32x2_t] - - [uint32x4_t, float32x4_t] + - [uint16x4_t, float16x4_t] + - [uint16x8_t, float16x8_t] compose: - - FnCall: [static_assert!, ['N >= 1 && N <= 32']] + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] - LLVMLink: name: "vcvt{neon_type[1].N}_{neon_type[0]}" arguments: - "a: {neon_type[0]}" - "n: i32" links: + - link: "llvm.arm.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}" + arch: arm - link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]] - - name: "vcvt{type[2]}" - doc: "Floating-point convert to fixed-point, rounding toward zero" + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Floating-point convert to signed fixed-point" arguments: ["a: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: - - *target-is-arm - - FnCall: [target_feature, ['enable = "neon,v7"']] - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vcvt, 'N = 2']]}]] + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcvt"', 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtzs, 'N = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']] + - *neon-fp16 + - *neon-unstable-f16 static_defs: ['const N: i32'] safety: unsafe: [neon] types: - - [float32x2_t, int32x2_t, _n_s32_f32] - - [float32x4_t, int32x4_t, q_n_s32_f32] + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.arm.neon.vcvtfp2fxs.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.vcvtfp2fxs.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N]] + + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to unsigned fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcvt"', 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtzu, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.arm.neon.vcvtfp2fxu.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.vcvtfp2fxu.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N]] + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg, ['target_arch = "arm"']] + - FnCall: [target_feature, ['enable = "neon,v7"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vcvt, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']] + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [int32x2_t, float32x2_t] + - [int32x4_t, float32x4_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 32']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.arm.neon.vcvtfxs2fp.{neon_type[1]}.{neon_type[0]}" + arch: arm + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N]] + + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcvt"', 'N = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [scvtf, 'N = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [int16x4_t, float16x4_t] + - [int16x8_t, float16x8_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 16']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.arm.neon.vcvtfxs2fp.{neon_type[1]}.{neon_type[0]}" + arch: arm + - link: "llvm.aarch64.neon.vcvtfxs2fp.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N]] + + - name: "vcvt{neon_type[1].N}_{neon_type[0]}" + doc: "Fixed-point convert to floating-point" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [int32x2_t, float32x2_t] + - [int32x4_t, float32x4_t] + compose: + - FnCall: [static_assert!, ['N >= 1 && N <= 32']] + - LLVMLink: + name: "vcvt{neon_type[1].N}_{neon_type[0]}" + arguments: + - "a: {neon_type[0]}" + - "n: i32" + links: + - link: "llvm.aarch64.neon.vcvtfxs2fp.{neon_type[1]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N]] + + - name: "vcvt{type[2]}" + doc: "Floating-point convert to fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *target-is-arm + - FnCall: [target_feature, ['enable = "neon,v7"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vcvt, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']] + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [float32x2_t, int32x2_t, _n_s32_f32] + - [float32x4_t, int32x4_t, q_n_s32_f32] compose: - FnCall: [static_assert!, ['N >= 1 && N <= 32']] - LLVMLink: @@ -1131,6 +1554,68 @@ intrinsics: - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] + + - name: "vdup{type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vdup.16"', 'N = 4']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [dup, 'N = 4']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [q_laneq_f16, float16x8_t, float16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [_laneq_f16, float16x8_t, float16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] + - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] + + - name: "vdup{type[3]}{neon_type[0]}" + doc: "Create a new vector with all lanes set to a value" + arguments: ["a: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vdup.16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [dup]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, f16, 'float16x4', '_n_'] + - [float16x8_t, f16, 'float16x8', 'q_n_'] + compose: + - "{type[2]}_t::splat(a)" + + - name: "vdup{type[0]}" + doc: "Set all vector lanes to the same value" + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vdup.16"', 'N = 2']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [dup, 'N = 2']]}]] + - FnCall: [rustc_legacy_const_generics, ['1']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [_lane_f16, float16x4_t, float16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]'] + - [q_lane_f16, float16x4_t, float16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + compose: + - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] + - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] + + - name: "vdup{type[0]}" doc: "Set all vector lanes to the same value" arguments: ["a: {neon_type[1]}"] @@ -1339,6 +1824,47 @@ intrinsics: - Identifier: ["{type[1]}", Symbol] - Identifier: ["{type[2]}", Symbol] + + - name: "vext{neon_type[0].no}" + doc: "Extract vector from pair of vectors" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vext.8"', 'N = 3']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ext, 'N = 3']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [float16x4_t, ' static_assert_uimm_bits!(N, 2); match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }'] + compose: + - Identifier: ["{type[1]}", Symbol] + + - name: "vext{neon_type[0].no}" + doc: "Extract vector from pair of vectors" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vext.8"', 'N = 7']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ext, 'N = 7']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const N: i32'] + safety: + unsafe: [neon] + types: + - [float16x8_t, ' static_assert_uimm_bits!(N, 3); match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }'] + compose: + - Identifier: ["{type[1]}", Symbol] + + + - name: "vext{neon_type[0].no}" doc: "Extract vector from pair of vectors" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -1789,6 +2315,24 @@ intrinsics: compose: - FnCall: [simd_neg, [a]] + - name: "vneg{neon_type[0].no}" + doc: Negate + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vneg.{type[1]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fneg]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, 'f16'] + - [float16x8_t, 'f16'] + compose: + - FnCall: [simd_neg, [a]] + - name: "vqneg{neon_type[0].no}" doc: Signed saturating negate arguments: ["a: {neon_type[0]}"] @@ -2089,6 +2633,30 @@ intrinsics: - link: "llvm.arm.neon.vrintn.{neon_type}" arch: arm + - name: "vrndn{neon_type.no}" + doc: "Floating-point round to integral, to nearest with ties to even" + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [target_feature, ['enable = "fp-armv8,v8"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrintn]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frintn]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "llvm.frinn.{neon_type}" + links: + - link: "llvm.aarch64.neon.frintn.{neon_type}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vrintn.{neon_type}" + arch: arm + - name: "vqadd{neon_type.no}" doc: Saturating add arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -2333,15 +2901,85 @@ intrinsics: - transmute - - a - - name: "vld2{neon_type[1].nox}" - doc: Load multiple 2-element structures to two registers + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers" arguments: ["a: {type[0]}"] return_type: "{neon_type[1]}" attr: - - *enable-v7 - - *target-is-arm - - *neon-unstable - assert_instr: [vld2] + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t] + - ["*const f16", float16x8x2_t] + - ["*const f16", float16x4x3_t] + - ["*const f16", float16x8x3_t] + - ["*const f16", float16x4x4_t] + - ["*const f16", float16x8x4_t] + compose: + - LLVMLink: + name: "vld1x{neon_type[1].tuple}.{neon_type[1]}" + links: + - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0f{neon_type[1].base}" + arch: aarch64,arm64ec + - link: "llvm.arm.neon.vld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0f{neon_type[1].base}" + arch: arm + + - name: "vld1{type[2]}_{neon_type[1]}" + doc: "Load one single-element structure to one lane of one register" + arguments: ["ptr: {type[0]}", "src: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4_t, '_lane', '2'] + - ["*const f16", float16x8_t, 'q_lane', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: [simd_insert!, [src, "LANE as u32", "*ptr"]] + + - name: "vld1{type[2]}_{neon_type[1]}" + doc: "Load one single-element structure and replicate to all lanes of one register" + arguments: ["ptr: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ["vld1"]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1r]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4_t, '_dup', 'f16x4', "[0, 0, 0, 0]"] + - ["*const f16", float16x8_t, 'q_dup', 'f16x8', "[0, 0, 0, 0, 0, 0, 0, 0]"] + compose: + - Let: [x, "{neon_type[1]}", "vld1{neon_type[1].lane_nox}::<0>(ptr, transmute({type[3]}::splat(0.)))"] + - FnCall: [simd_shuffle!, [x, x, "{type[4]}"]] + + + - name: "vld2{neon_type[1].nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - *neon-unstable + assert_instr: [vld2] safety: unsafe: [neon] types: @@ -2905,6 +3543,399 @@ intrinsics: - "_vld2{neon_type[1].dup_nox}" - - "a as _" + - name: "vld2{neon_type[1].nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld2]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16] + - ["*const f16", float16x8x2_t, f16] + compose: + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld2.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: arm + - FnCall: + - "_vld2{neon_type[1].nox}" + - - "a as _" + - "2" + + - name: "vld2{neon_type[1].nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16] + - ["*const f16", float16x8x2_t, f16] + compose: + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld2.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].nox}" + - - "a as _" + + - name: "vld2{neon_type[1].dup_nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - *neon-fp16 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld2]]}]] + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16] + - ["*const f16", float16x8x2_t, f16] + compose: + - LLVMLink: + name: "vld2dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld2dup.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: arm + - FnCall: + - "_vld2{neon_type[1].dup_nox}" + - - "a as _" + - "2" + + + - name: "vld2{neon_type[1].dup_nox}" + doc: Load single 2-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2r]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16] + - ["*const f16", float16x8x2_t, f16] + compose: + - LLVMLink: + name: "vld2dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld2r.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].dup_nox}" + - - "a as _" + + + - name: "vld2{neon_type[1].lane_nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['vld2', 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x2_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "ptr: *const f16" + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "n: i32" + - "size: i32" + links: + - link: "llvm.arm.neon.vld2lane.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld2{neon_type[1].lane_nox}" + - - "a as _" + - "b.0" + - "b.1" + - "LANE" + - "2" + + + - name: "vld2{neon_type[1].lane_nox}" + doc: Load multiple 2-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x2_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x2_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld2.{neon_type[1]}" + arguments: + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "n: i64" + - "ptr: *const f16" + links: + - link: "llvm.aarch64.neon.ld2lane.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld2{neon_type[1].lane_nox}" + - - "b.0" + - "b.1" + - "LANE as i64" + - "a as _" + + + - name: "vld3{neon_type[1].nox}" + doc: Load single 3-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld3]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16] + - ["*const f16", float16x8x3_t, f16] + compose: + - LLVMLink: + name: "vld3.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld3.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: arm + - FnCall: + - "_vld3{neon_type[1].nox}" + - - "a as _" + - "2" + + - name: "vld3{neon_type[1].nox}" + doc: Load single 3-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16] + - ["*const f16", float16x8x3_t, f16] + compose: + - LLVMLink: + name: "vld3.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: aarch64,arm64ec + - FnCall: + - "_vld3{neon_type[1].nox}" + - - "a as _" + + - name: "vld3{neon_type[1].dup_nox}" + doc: Load single 3-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld3]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16] + - ["*const f16", float16x8x3_t, f16] + compose: + - LLVMLink: + name: "vld3dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld3dup.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: arm + - FnCall: + - "_vld3{neon_type[1].dup_nox}" + - - "a as _" + - "2" + + + - name: "vld3{neon_type[1].dup_nox}" + doc: Load single 3-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3r]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16] + - ["*const f16", float16x8x3_t, f16] + compose: + - LLVMLink: + name: "vld3dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld3r.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: aarch64,arm64ec + - FnCall: + - "_vld3{neon_type[1].dup_nox}" + - - "a as _" + + + - name: "vld3{neon_type[1].lane_nox}" + doc: Load multiple 3-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['vld3', 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x3_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld3.{neon_type[1]}" + arguments: + - "ptr: *const f16" + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "c: {neon_type[3]}" + - "n: i32" + - "size: i32" + links: + - link: "llvm.arm.neon.vld3lane.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld3{neon_type[1].lane_nox}" + - - "a as _" + - "b.0" + - "b.1" + - "b.2" + - "LANE" + - "2" + + + - name: "vld3{neon_type[1].lane_nox}" + doc: Load multiple 3-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x3_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x3_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld3.{neon_type[1]}" + arguments: + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "c: {neon_type[3]}" + - "n: i64" + - "ptr: *const f16" + links: + - link: "llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld3{neon_type[1].lane_nox}" + - - "b.0" + - "b.1" + - "b.2" + - "LANE as i64" + - "a as _" + - name: "vld3{neon_type[1].lane_nox}" doc: "Load multiple 3-element structures to two registers" arguments: ["a: {type[0]}", "b: {neon_type[1]}"] @@ -3841,6 +4872,31 @@ intrinsics: - FnCall: [simd_extract!, [b, 'LANE as u32']] - Identifier: [';', Symbol] + + - name: "vst1{neon_type[1].lane_nox}" + doc: "Store multiple single-element structures from one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + types: + - ['*mut f16', float16x4_t, '2'] + - ['*mut f16', float16x8_t, '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - Assign: + - "*a" + - FnCall: [simd_extract!, [b, 'LANE as u32']] + - Identifier: [';', Symbol] + + - name: 'vst1{neon_type[1].no}' doc: "Store multiple single-element structures from one, two, three, or four registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4057,6 +5113,34 @@ intrinsics: arch: arm - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1', 'b.2', 'b.3']] + - name: 'vst1{neon_type[1].no}' + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + safety: + unsafe: [neon] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [vst1] + types: + - [f16, float16x4x4_t, float16x4_t] + - [f16, float16x8x4_t, float16x8_t] + compose: + - LLVMLink: + name: 'st1x4.{neon_type[1]}' + arguments: + - 'ptr: *mut {type[0]}' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + links: + - link: 'llvm.arm.neon.vst1x4.p0{type[0]}.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1', 'b.2', 'b.3']] + - name: "vst2{neon_type[1].nox}" doc: "Store multiple 2-element structures from two registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4149,19 +5233,45 @@ intrinsics: arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] attr: - *target-not-arm - - *neon-stable + - *neon-stable + assert_instr: [st2] + safety: + unsafe: [neon] + types: + - [i8, int8x8x2_t, int8x8_t] + - [i16, int16x4x2_t, int16x4_t] + - [i32, int32x2x2_t, int32x2_t] + - [i8, int8x16x2_t, int8x16_t] + - [i16, int16x8x2_t, int16x8_t] + - [i32, int32x4x2_t, int32x4_t] + - [f32, float32x2x2_t, float32x2_t] + - [f32, float32x4x2_t, float32x4_t] + compose: + - LLVMLink: + name: 'st2.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0i8' + arch: aarch64,arm64ec + - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] + + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - *neon-fp16 + - *neon-unstable-f16 assert_instr: [st2] safety: unsafe: [neon] types: - - [i8, int8x8x2_t, int8x8_t] - - [i16, int16x4x2_t, int16x4_t] - - [i32, int32x2x2_t, int32x2_t] - - [i8, int8x16x2_t, int8x16_t] - - [i16, int16x8x2_t, int16x8_t] - - [i32, int32x4x2_t, int32x4_t] - - [f32, float32x2x2_t, float32x2_t] - - [f32, float32x4x2_t, float32x4_t] + - [f16, float16x4x2_t, float16x4_t] + - [f16, float16x8x2_t, float16x8_t] compose: - LLVMLink: name: 'st2.{neon_type[1]}' @@ -4174,6 +5284,7 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] + - name: "vst2{neon_type[1].nox}" doc: "Store multiple 2-element structures from two registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4235,6 +5346,37 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ['_vst2{neon_type[1].lane_nox}', ['b.0', 'b.1', 'LANE as i64', 'a as _']] + + - name: "vst2{neon_type[1].lane_nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x2_t, '2', float16x4_t] + - [f16, float16x8x2_t, '3', float16x8_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst2.{neon_type[1].lane_nox}' + arguments: + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st2lane.v{neon_type[1].lane}{type[0]}.p0i8' + arch: aarch64,arm64ec + - FnCall: ['_vst2{neon_type[1].lane_nox}', ['b.0', 'b.1', 'LANE as i64', 'a as _']] + + - name: "vst2{neon_type[1].lane_nox}" doc: "Store multiple 2-element structures from two registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4296,6 +5438,35 @@ intrinsics: arch: arm - FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', "{type[3]}"]] + + - name: "vst2{neon_type[1].nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [vst2] + safety: + unsafe: [neon] + types: + - [f16, float16x4x2_t, float16x4_t, '2'] + - [f16, float16x8x2_t, float16x8_t, '2'] + compose: + - LLVMLink: + name: 'vst2.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst2.p0i8.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', "{type[3]}"]] + + - name: "vst2{neon_type[1].lane_nox}" doc: "Store multiple 2-element structures from two registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4331,6 +5502,39 @@ intrinsics: arch: arm - FnCall: ['_vst2{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'LANE', "{type[4]}"]] + + - name: "vst2{neon_type[1].lane_nox}" + doc: "Store multiple 2-element structures from two registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst2, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x2_t, '2', float16x4_t, '2'] + - [f16, float16x8x2_t, '1', float16x8_t, '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst2lane.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'n: i32' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst2lane.p0i8.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst2{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'LANE', "{type[4]}"]] + + - name: "vst3{neon_type[1].nox}" doc: "Store multiple 3-element structures from three registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4510,6 +5714,36 @@ intrinsics: arch: arm - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', "{type[3]}"]] + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [vst3] + safety: + unsafe: [neon] + types: + - [f16, float16x4x3_t, float16x4_t, '2'] + - [f16, float16x8x3_t, float16x8_t, '2'] + compose: + - LLVMLink: + name: 'vst3.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst3.p0i8.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', "{type[3]}"]] + + - name: "vst3{neon_type[1].lane_nox}" doc: "Store multiple 3-element structures from three registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4546,6 +5780,40 @@ intrinsics: arch: arm - FnCall: ['_vst3{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'LANE', "{type[4]}"]] + + - name: "vst3{neon_type[1].lane_nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst3, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x3_t, '2', float16x4_t, '4'] + - [f16, float16x8x3_t, '3', float16x8_t, '4'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst3lane.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'c: {type[3]}' + - 'n: i32' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst3lane.p0i8.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst3{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'LANE', "{type[4]}"]] + + - name: "vst3{neon_type[1].nox}" doc: "Store multiple 3-element structures from three registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4575,6 +5843,34 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']] + + - name: "vst3{neon_type[1].nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [st3] + safety: + unsafe: [neon] + types: + - [f16, float16x4x3_t, float16x4_t] + - [f16, float16x8x3_t, float16x8_t] + compose: + - LLVMLink: + name: 'vst3.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0i8' + arch: aarch64,arm64ec + - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']] + + - name: "vst3{neon_type[1].lane_nox}" doc: "Store multiple 3-element structures from three registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4609,6 +5905,38 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ['_vst3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']] + + - name: "vst3{neon_type[1].lane_nox}" + doc: "Store multiple 3-element structures from three registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x3_t, '2', float16x4_t] + - [f16, float16x8x3_t, '3', float16x8_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst3.{neon_type[1].lane_nox}' + arguments: + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'c: {type[3]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st3lane.v{neon_type[1].lane}{type[0]}.p0i8' + arch: aarch64,arm64ec + - FnCall: ['_vst3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']] + + - name: "vst4{neon_type[1].nox}" doc: "Store multiple 4-element structures from four registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4791,6 +6119,37 @@ intrinsics: arch: arm - FnCall: ['_vst4{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', "{type[3]}"]] + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [vst4] + safety: + unsafe: [neon] + types: + - [f16, float16x4x4_t, float16x4_t, '2'] + - [f16, float16x8x4_t, float16x8_t, '2'] + compose: + - LLVMLink: + name: 'vst4.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst4.p0i8.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst4{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', "{type[3]}"]] + + - name: "vst4{neon_type[1].lane_nox}" doc: "Store multiple 4-element structures from four registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4828,6 +6187,40 @@ intrinsics: arch: arm - FnCall: ['_vst4{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', 'LANE', "{type[4]}"]] + - name: "vst4{neon_type[1].lane_nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-is-arm + - *neon-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x4_t, '2', float16x4_t, '2'] + - [f16, float16x8x4_t, '3', float16x8_t, '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst4lane.{neon_type[1]}' + arguments: + - 'ptr: *mut i8' + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'c: {type[3]}' + - 'd: {type[3]}' + - 'n: i32' + - 'size: i32' + links: + - link: 'llvm.arm.neon.vst4lane.p0i8.v{neon_type[1].lane}{type[0]}' + arch: arm + - FnCall: ['_vst4{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', 'LANE', "{type[4]}"]] + + - name: "vst4{neon_type[1].nox}" doc: "Store multiple 4-element structures from four registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4858,6 +6251,35 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + + - name: "vst4{neon_type[1].nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [st4] + safety: + unsafe: [neon] + types: + - [f16, float16x4x4_t, float16x4_t] + - [f16, float16x8x4_t, float16x8_t] + compose: + - LLVMLink: + name: 'vst4.{neon_type[1]}' + arguments: + - 'a: {type[2]}' + - 'b: {type[2]}' + - 'c: {type[2]}' + - 'd: {type[2]}' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0i8' + arch: aarch64,arm64ec + - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + + - name: "vst4{neon_type[1].lane_nox}" doc: "Store multiple 4-element structures from four registers" arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] @@ -4893,6 +6315,39 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ['_vst4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']] + + - name: "vst4{neon_type[1].lane_nox}" + doc: "Store multiple 4-element structures from four registers" + arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"] + attr: + - *target-not-arm + - FnCall: [rustc_legacy_const_generics, ['2']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [f16, float16x4x4_t, '2', float16x4_t] + - [f16, float16x8x4_t, '3', float16x8_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - LLVMLink: + name: 'vst4.{neon_type[1].lane_nox}' + arguments: + - 'a: {type[3]}' + - 'b: {type[3]}' + - 'c: {type[3]}' + - 'd: {type[3]}' + - 'n: i64' + - 'ptr: *mut i8' + links: + - link: 'llvm.aarch64.neon.st4lane.v{neon_type[1].lane}{type[0]}.p0i8' + arch: aarch64,arm64ec + - FnCall: ['_vst4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']] + + - name: "vusdot{neon_type[0].no}" doc: "Dot product vector form with unsigned and signed integers" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] @@ -5024,6 +6479,26 @@ intrinsics: compose: - FnCall: [simd_mul, [a, b]] + + - name: "vmul{neon_type[1].no}" + doc: Multiply + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmul.{type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [f16, float16x4_t] + - [f16, float16x8_t] + compose: + - FnCall: [simd_mul, [a, b]] + + - name: "vmul{neon_type[0].lane_nox}" doc: Multiply arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] @@ -5054,6 +6529,32 @@ intrinsics: - - a - FnCall: ["simd_shuffle!", [b, b, "{type[3]}"]] + + - name: "vmul{neon_type[0].lane_nox}" + doc: Multiply + arguments: ["a: {neon_type[0]}", "v: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmul, 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul, 'LANE = 1']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [float16x8_t, float16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + compose: + - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]] + - FnCall: + - simd_mul + - - a + - FnCall: ["simd_shuffle!", [v, v, "{type[3]}"]] + + - name: "vmul{neon_type[0].laneq_nox}" doc: Multiply arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] @@ -5261,6 +6762,33 @@ intrinsics: arch: arm - FnCall: ["_vfma{neon_type.no}", [b, c, a]] + + - name: "vfma{neon_type.no}" + doc: Floating-point fused Multiply-Add to accumulator (vector) + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "vfp4"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vfma]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fmla]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fma.{neon_type}" + links: + - link: "llvm.fma.{neon_type}" + arch: aarch64 + - link: "llvm.fma.{neon_type}" + arch: arm + - FnCall: ["_vfma{neon_type.no}", [b, c, a]] + + - name: "vfma{neon_type[0].N}" doc: Floating-point fused Multiply-Add to accumulator(vector) arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] @@ -5368,6 +6896,64 @@ intrinsics: compose: - FnCall: [simd_sub, [a, b]] + + - name: "vsub{neon_type[1].no}" + doc: "Subtract" + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vsub.{type[0]}"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fsub]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ['f16', float16x4_t] + - ['f16', float16x8_t] + compose: + - FnCall: [simd_sub, [a, b]] + + + - name: "vadd{neon_type.no}" + doc: Floating-point Add (vector). + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vadd.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fadd]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - FnCall: + - simd_add + - - a + - b + + - name: "vadd{type[0]}" + doc: Add + arguments: ["a: {type[1]}", "b: {type[1]}"] + return_type: "{type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vadd.f16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fadd]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ['h_f16', 'f16'] + compose: + - 'a + b' + - name: "vadd{neon_type.no}" doc: Bitwise exclusive OR arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -5894,6 +7480,32 @@ intrinsics: - link: "llvm.aarch64.neon.fmax.{neon_type}" arch: aarch64,arm64ec + + - name: "vmax{neon_type.no}" + doc: Maximum (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmax]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmax]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vmax.{neon_type}" + links: + - link: "llvm.arm.neon.vmaxs.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fmax.{neon_type}" + arch: aarch64,arm64ec + + - name: "vmaxnm{neon_type.no}" doc: Floating-point Maximum Number (vector) arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -5918,6 +7530,57 @@ intrinsics: - link: "llvm.aarch64.neon.fmaxnm.{neon_type}" arch: aarch64,arm64ec + + - name: "vmaxnm{neon_type.no}" + doc: Floating-point Maximum Number (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [target_feature, ['enable = "fp-armv8,v8"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmaxnm]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmaxnm]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fmaxnm.{neon_type}" + links: + - link: "llvm.arm.neon.vmaxnm.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fmaxnm.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vminnm{neon_type.no}" + doc: Floating-point Minimum Number (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [target_feature, ['enable = "fp-armv8,v8"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vminnm]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fminnm]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "fminnm.{neon_type}" + links: + - link: "llvm.arm.neon.vminnm.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fminnm.{neon_type}" + arch: aarch64,arm64ec + + - name: "vmin{neon_type.no}" doc: "Minimum (vector)" arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -5998,6 +7661,33 @@ intrinsics: - link: "llvm.aarch64.neon.fmin.{neon_type}" arch: aarch64,arm64ec + + - name: "vmin{neon_type.no}" + doc: Minimum (vector) + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmin]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmin]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vmin.{neon_type}" + links: + - link: "llvm.arm.neon.vmins.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.fmin.{neon_type}" + arch: aarch64,arm64ec + + + - name: "vminnm{neon_type.no}" doc: "Floating-point Minimum Number (vector)" arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -6035,7 +7725,30 @@ intrinsics: safety: unsafe: [neon] types: - - float32x2_t + - float32x2_t + compose: + - LLVMLink: + name: "faddp.{neon_type}" + links: + - link: "llvm.arm.neon.vpadd.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.faddp.{neon_type}" + arch: aarch64,arm64ec + + - name: "vpadd{neon_type.no}" + doc: Floating-point add pairwise + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpadd]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [faddp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t compose: - LLVMLink: name: "faddp.{neon_type}" @@ -6045,6 +7758,7 @@ intrinsics: - link: "llvm.aarch64.neon.faddp.{neon_type}" arch: aarch64,arm64ec + - name: "vqdmull{neon_type[0].noq}" doc: "Signed saturating doubling multiply long" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -6896,6 +8610,32 @@ intrinsics: - link: "llvm.aarch64.neon.frsqrts.{neon_type}" arch: aarch64,arm64ec + + - name: "vrsqrts{neon_type.no}" + doc: "Floating-point reciprocal square root step" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - *neon-fp16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrsqrts]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [frsqrts]]}]] + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vrsqrts{neon_type.no}" + links: + - link: "llvm.arm.neon.vrsqrts.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.frsqrts.{neon_type}" + arch: aarch64,arm64ec + + - name: "vrecpe{neon_type.no}" doc: "Reciprocal estimate." arguments: ["a: {neon_type}"] @@ -6920,6 +8660,32 @@ intrinsics: - link: "llvm.aarch64.neon.frecpe.{neon_type}" arch: aarch64,arm64ec + + - name: "vrecpe{neon_type.no}" + doc: "Reciprocal estimate." + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrecpe]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [frecpe]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vrecpe{neon_type.no}" + links: + - link: "llvm.arm.neon.vrecpe.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.frecpe.{neon_type}" + arch: aarch64,arm64ec + + - name: "vrecps{neon_type.no}" doc: "Floating-point reciprocal step" arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -6944,6 +8710,32 @@ intrinsics: - link: "llvm.aarch64.neon.frecps.{neon_type}" arch: aarch64,arm64ec + + - name: "vrecps{neon_type.no}" + doc: "Floating-point reciprocal step" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrecps]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [frecps]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vrecps{neon_type.no}" + links: + - link: "llvm.arm.neon.vrecps.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.frecps.{neon_type}" + arch: aarch64,arm64ec + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" doc: Vector reinterpret cast operation arguments: ["a: {type[0]}"] @@ -7253,6 +9045,111 @@ intrinsics: compose: - FnCall: [transmute, [a]] + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + # non-q + - [float32x2_t, float16x4_t] + - [poly16x4_t, float16x4_t] + - [poly8x8_t, float16x4_t] + - [int8x8_t, float16x4_t] + - [int16x4_t, float16x4_t] + - [int32x2_t, float16x4_t] + - [int64x1_t, float16x4_t] + - [uint8x8_t, float16x4_t] + - [uint16x4_t, float16x4_t] + - [uint32x2_t, float16x4_t] + - [uint64x1_t, float16x4_t] + - [float16x4_t, float32x2_t] + - [float16x4_t, poly16x4_t] + - [float16x4_t, poly8x8_t] + - [float16x4_t, int8x8_t] + - [float16x4_t, int16x4_t] + - [float16x4_t, int32x2_t] + - [float16x4_t, int64x1_t] + - [float16x4_t, uint8x8_t] + - [float16x4_t, uint16x4_t] + - [float16x4_t, uint32x2_t] + - [float16x4_t, uint64x1_t] + # q + - [float32x4_t, float16x8_t] + - [poly16x8_t, float16x8_t] + - [poly8x16_t, float16x8_t] + - [int8x16_t, float16x8_t] + - [int16x8_t, float16x8_t] + - [int32x4_t, float16x8_t] + - [int64x2_t, float16x8_t] + - [uint8x16_t, float16x8_t] + - [uint16x8_t, float16x8_t] + - [uint32x4_t, float16x8_t] + - [uint64x2_t, float16x8_t] + - [float16x8_t, float32x4_t] + - [float16x8_t, poly16x8_t] + - [float16x8_t, poly8x16_t] + - [float16x8_t, int8x16_t] + - [float16x8_t, int16x8_t] + - [float16x8_t, int32x4_t] + - [float16x8_t, int64x2_t] + - [float16x8_t, uint8x16_t] + - [float16x8_t, uint16x8_t] + - [float16x8_t, uint32x4_t] + - [float16x8_t, uint64x2_t] + compose: + - FnCall: [transmute, [a]] + + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [poly64x1_t, float16x4_t] + - [float16x4_t, poly64x1_t] + # q + - [poly64x2_t, float16x8_t] + - [poly128_t, float16x8_t] + - [float16x8_t, poly128_t] + - [float16x8_t, poly64x2_t] + compose: + - FnCall: [transmute, [a]] + + - name: "vrev64{neon_type[0].no}" + doc: Reverse elements in 64-bit doublewords + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrev64]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [rev64]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, "[3, 2, 1, 0]"] + - [float16x8_t, "[3, 2, 1, 0, 7, 6, 5, 4]"] + compose: + - FnCall: [simd_shuffle!, [a, a, "{type[1]}"]] + - name: "vrshl{neon_type.no}" doc: "Signed rounding shift left" arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -7562,6 +9459,23 @@ intrinsics: compose: - FnCall: [transmute, [a]] + - name: "vcreate_{neon_type[1]}" + doc: "Insert vector element from another vector element" + arguments: ["a: {type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [nop]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["u64", float16x4_t] + compose: + - FnCall: [transmute, [a]] + - name: "vcreate_p64" doc: "Insert vector element from another vector element" arguments: ["a: {type[0]}"] @@ -7619,6 +9533,29 @@ intrinsics: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] - FnCall: [simd_insert!, [b, 'LANE as u32', a]] + + - name: "vset{neon_type[1].lane_nox}" + doc: "Insert vector element from another vector element" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop, LANE = 0]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['2']] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - ["f16", float16x4_t, '2'] + - ["f16", float16x8_t, '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] + - FnCall: [simd_insert!, [b, 'LANE as u32', a]] + + - name: "vset_lane_{neon_type[0]}" doc: "Insert vector element from another vector element" arguments: ["a: {type[0]}", "b: {neon_type[1]}"] @@ -8043,6 +9980,36 @@ intrinsics: - transmute - - Identifier: ['(a1, b1)', Symbol] + + - name: "vtrn{neon_type[0].no}" + doc: "Transpose elements" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vtrn]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [trn]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]'] + - [float16x8_t, float16x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]'] + compose: + - Let: + - a1 + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [a, b, "{type[2]}"]] + - Let: + - b1 + - "{neon_type[0]}" + - FnCall: [simd_shuffle!, [a, b, "{type[3]}"]] + - FnCall: + - transmute + - - Identifier: ['(a1, b1)', Symbol] + + - name: "vtrn{neon_type[0].no}" doc: "Transpose elements" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -8168,6 +10135,35 @@ intrinsics: - transmute - - '(a0, b0)' + + - name: "vzip{neon_type[0].no}" + doc: Zip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vzip.16"']]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]'] + - [float16x8_t, float16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]'] + compose: + - Let: + - a0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[2]}"]] + - Let: + - b0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[3]}"]] + - FnCall: + - transmute + - - '(a0, b0)' + - name: "vuzp{neon_type[0].no}" doc: Unzip vectors arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -8209,6 +10205,36 @@ intrinsics: - transmute - - '(a0, b0)' + + - name: "vuzp{neon_type[0].no}" + doc: Unzip vectors + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vuzp]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]'] + - [float16x8_t, float16x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]'] + compose: + - Let: + - a0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[2]}"]] + - Let: + - b0 + - "{neon_type[0]}" + - FnCall: ["simd_shuffle!", [a, b, "{type[3]}"]] + - FnCall: + - transmute + - - '(a0, b0)' + + - name: "vuzp{neon_type[0].no}" doc: Unzip vectors arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -8426,23 +10452,79 @@ intrinsics: - "a: {neon_type[2]}" - "b: {neon_type[2]}" links: - - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.{neon_type[2]}.p0" - arch: arm - - FnCall: ["_vst1{neon_type[1].no}", ['a', 'b.0', 'b.1']] + - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.{neon_type[2]}.p0" + arch: arm + - FnCall: ["_vst1{neon_type[1].no}", ['a', 'b.0', 'b.1']] + + + # vst1_f16_x2 - arm + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg, ['target_arch = "arm"']] + - *neon-v7 + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst1]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ['*mut f16', float16x4x2_t, float16x4_t] + - ['*mut f16', float16x8x2_t, float16x8_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "ptr: {type[0]}" + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + links: + - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.p0f16.{neon_type[2]}" + arch: arm + - FnCall: ["_vst1{neon_type[1].no}", ['a', 'b.0', 'b.1']] + + + # vst1_f16_x2 - aarch64 + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*mut f16", float16x4x2_t, float16x4_t] + - ["*mut f16", float16x8x2_t, float16x8_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.st1x2.{neon_type[2]}.p0f16" + arch: aarch64,arm64ec + - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', a]] - name: "vst1{neon_type[1].no}" doc: "Store multiple single-element structures to one, two, three, or four registers" arguments: ["a: {type[0]}", "b: {neon_type[1]}"] attr: - FnCall: [cfg, ['target_arch = "arm"']] - - FnCall: [target_feature, ['enable = "neon,v7"']] + - *neon-v7 - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst1]]}]] - - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']] + - *neon-fp16 + - *neon-unstable-f16 safety: unsafe: [neon] types: - - ['*mut f32', float32x2x3_t, float32x2_t] - - ['*mut f32', float32x4x3_t, float32x4_t] + - ['*mut f16', float16x4x3_t, float16x4_t] + - ['*mut f16', float16x8x3_t, float16x8_t] compose: - LLVMLink: name: "vst1{neon_type[1].no}" @@ -8452,7 +10534,7 @@ intrinsics: - "b: {neon_type[2]}" - "c: {neon_type[2]}" links: - - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.{neon_type[2]}.p0" + - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.p0f16.{neon_type[2]}" arch: arm - FnCall: ["_vst1{neon_type[1].no}", ['a', 'b.0', 'b.1', 'b.2']] @@ -8505,6 +10587,34 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', a]] + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*mut f16", float16x4x3_t, float16x4_t] + - ["*mut f16", float16x8x3_t, float16x8_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + - "c: {neon_type[2]}" + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.st1x3.{neon_type[2]}.p0f16" + arch: aarch64,arm64ec + - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', a]] + + - name: "vst1{neon_type[1].no}" doc: "Store multiple single-element structures to one, two, three, or four registers" arguments: ["a: {type[0]}", "b: {neon_type[1]}"] @@ -8531,6 +10641,52 @@ intrinsics: arch: aarch64,arm64ec - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', 'b.3', a]] + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + attr: + - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*mut f16", float16x4x4_t, float16x4_t] + - ["*mut f16", float16x8x4_t, float16x8_t] + compose: + - LLVMLink: + name: "vst1{neon_type[1].no}" + arguments: + - "a: {neon_type[2]}" + - "b: {neon_type[2]}" + - "c: {neon_type[2]}" + - "d: {neon_type[2]}" + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.st1x4.{neon_type[2]}.p0f16" + arch: aarch64,arm64ec + - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', 'b.3', a]] + + +# - name: "vst1{neon_type[1].no}" +# doc: "Store a single-element structures to one register." +# arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"] +# attr: +# - *neon-v7 +# - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst1]]}]] +# - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st1]]}]] +# - *neon-fp16 +# - *neon-unstable-f16 +# safety: +# unsafe: [neon] +# types: +# - ["*mut f16", float16x4_t] +# - ["*mut f16", float16x8_t] +# compose: +# - FnCall: [core::ptr::write_unaligned, ['ptr.cast()', a]] + - name: "vfms{neon_type.no}" doc: "Floating-point fused multiply-subtract from accumulator" arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] @@ -8632,6 +10788,47 @@ intrinsics: compose: - FnCall: [simd_ge, [a, b]] + - name: "vcge{neon_type[0].no}" + doc: "Floating-point compare greater than or equal" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcge.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [simd_ge, [a, b]] + + + - name: "vcgez{neon_type[0].no}" + doc: "Floating-point compare greater than or equal to zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcge.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmge]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t, f16x4, 'f16x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float16x8_t, uint16x8_t, f16x8, 'f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_ge + - - a + - FnCall: [transmute, [b]] + - name: "vclt{neon_type.no}" doc: "Compare unsigned less than" arguments: ["a: {neon_type}", "b: {neon_type}"] @@ -8857,6 +11054,61 @@ intrinsics: - link: "llvm.fptoui.sat.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to unsigned fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcvtzu]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: + - simd_cast + - - a + + - name: "vcvt_f16_{neon_type[0]}" + doc: "Floating-point convert to lower precision narrow" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcvt.f16.f32]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcvtn]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float32x4_t, float16x4_t] + compose: + - FnCall: [simd_cast, [a]] + + - name: "vcvt_f32_f16" + doc: "Floating-point convert to higher precision long" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcvtl]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, float32x4_t] + compose: + - FnCall: [simd_cast, [a]] + - name: "vmla{neon_type[0].N}" doc: "Vector multiply accumulate with scalar" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"] @@ -9215,6 +11467,29 @@ intrinsics: - - a - FnCall: ["vdup{neon_type[0].N}", [b]] + + - name: "vmul{neon_type[0].N}" + doc: "Vector multiply by scalar" + arguments: ["a: {neon_type[0]}", "b: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vmul]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fmul]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, "f16"] + - [float16x8_t, "f16"] + compose: + - FnCall: + - simd_mul + - - a + - FnCall: ["vdup{neon_type[0].N}", [b]] + + - name: "vmul{type[2]}" doc: "Floating-point multiply" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] @@ -9310,6 +11585,47 @@ intrinsics: compose: - FnCall: [simd_lt, [a, b]] + - name: "vclt{neon_type[0].no}" + doc: "Floating-point compare less than" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcgt.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmgt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [simd_lt, [a, b]] + + + - name: "vcltz{neon_type[0].no}" + doc: "Floating-point compare less than" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vclt.f16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmlt]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, uint16x4_t, f16x4, 'f16x4::new(0.0, 0.0, 0.0, 0.0)'] + - [float16x8_t, uint16x8_t, f16x8, 'f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)'] + compose: + - Let: [b, "{type[2]}", "{type[3]}"] + - FnCall: + - simd_lt + - - a + - FnCall: [transmute, [b]] + - name: "vabdl_{neon_type[0]}" doc: "Unsigned Absolute difference Long" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -9403,6 +11719,27 @@ intrinsics: - b - FnCall: ["vdup{neon_type[0].N}_vfp4", [c]] + + - name: "vfms{neon_type.no}" + doc: "Floating-point fused multiply-subtract from accumulator" + arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "vfp4"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fmls]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - Let: [b, "{neon_type}", {FnCall: [simd_neg, [b]]}] + - FnCall: ["vfma{neon_type.no}", [a, b, c]] + - name: "vqdmulh{neon_type[0].laneq_nox}" doc: "Vector saturating doubling multiply high by scalar" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] @@ -9503,6 +11840,32 @@ intrinsics: - link: "llvm.aarch64.neon.frsqrte.{neon_type}" arch: aarch64,arm64ec + + - name: "vrsqrte{neon_type.no}" + doc: "Reciprocal square-root estimate." + arguments: ["a: {neon_type}"] + return_type: "{neon_type}" + attr: + - *neon-v8 + - *neon-fp16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrsqrte]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [frsqrte]]}]] + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - float16x4_t + - float16x8_t + compose: + - LLVMLink: + name: "vrsqrte{neon_type.no}" + links: + - link: "llvm.arm.neon.vrsqrte.{neon_type}" + arch: arm + - link: "llvm.aarch64.neon.frsqrte.{neon_type}" + arch: aarch64,arm64ec + + - name: "vqshlu{neon_type[0].N}" doc: "Signed saturating shift left unsigned" arguments: ["a: {neon_type[0]}"] @@ -9594,6 +11957,27 @@ intrinsics: - link: "llvm.fptosi.sat.{neon_type[1]}.{neon_type[0]}" arch: aarch64,arm64ec + + - name: "vcvt{neon_type[1].no}_{neon_type[0]}" + doc: "Floating-point convert to signed fixed-point, rounding toward zero" + arguments: ["a: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcvt]]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcvtzs]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, int16x4_t] + - [float16x8_t, int16x8_t] + compose: + - FnCall: + - simd_cast + - - a + - name: "vqmovn_{neon_type[0]}" doc: "Unsigned saturating extract narrow" arguments: ["a: {neon_type[0]}"] @@ -9794,28 +12178,53 @@ intrinsics: - - FnCall: ["vld4{neon_type[2].dup_nox}", [{FnCall: [transmute, [a]]}]] - name: "vld1{type[0]}" - visibility: private + visibility: private + doc: "Load multiple single-element structures to one, two, three, or four registers" + arguments: ["a: {type[1]}", "b: {type[2]}"] + return_type: "{neon_type[3]}" + attr: + - FnCall: [cfg, ['target_arch = "arm"']] + - FnCall: [target_feature, ['enable = "neon,v7"']] + # - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]] + - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']] + safety: + unsafe: [neon] + types: + - ["_v8i8", "*const i8", "i32", "int8x8_t"] + - ["q_v16i8", "*const i8", "i32", "int8x16_t"] + - ["_v4i16", "*const i8", "i32", "int16x4_t"] + - ["q_v8i16", "*const i8", "i32", "int16x8_t"] + - ["_v2i32", "*const i8", "i32", "int32x2_t"] + - ["q_v4i32", "*const i8", "i32", "int32x4_t"] + - ["_v1i64", "*const i8", "i32", "int64x1_t"] + - ["q_v2i64", "*const i8", "i32", "int64x2_t"] + - ["_v2f32", "*const i8", "i32", "float32x2_t"] + - ["q_v4f32", "*const i8", "i32", "float32x4_t"] + compose: + - LLVMLink: + name: "vld1.{type[0]}" + links: + - link: "llvm.arm.neon.vld1.{neon_type[3]}" + arch: arm + - FnCall: ["_vld1{type[0]}", [a, b]] + + + - name: "vld1{type[0]}" + visibility: private doc: "Load multiple single-element structures to one, two, three, or four registers" arguments: ["a: {type[1]}", "b: {type[2]}"] return_type: "{neon_type[3]}" attr: - FnCall: [cfg, ['target_arch = "arm"']] - FnCall: [target_feature, ['enable = "neon,v7"']] - # - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]] - - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - *neon-fp16 + - *neon-unstable-f16 safety: unsafe: [neon] types: - - ["_v8i8", "*const i8", "i32", "int8x8_t"] - - ["q_v16i8", "*const i8", "i32", "int8x16_t"] - - ["_v4i16", "*const i8", "i32", "int16x4_t"] - - ["q_v8i16", "*const i8", "i32", "int16x8_t"] - - ["_v2i32", "*const i8", "i32", "int32x2_t"] - - ["q_v4i32", "*const i8", "i32", "int32x4_t"] - - ["_v1i64", "*const i8", "i32", "int64x1_t"] - - ["q_v2i64", "*const i8", "i32", "int64x2_t"] - - ["_v2f32", "*const i8", "i32", "float32x2_t"] - - ["q_v4f32", "*const i8", "i32", "float32x4_t"] + - ["_v4f16", "*const i8", "i32", "float16x4_t"] + - ["q_v8f16", "*const i8", "i32", "float16x8_t"] compose: - LLVMLink: name: "vld1.{type[0]}" @@ -9824,6 +12233,7 @@ intrinsics: arch: arm - FnCall: ["_vld1{type[0]}", [a, b]] + - name: "vld1{neon_type[1].no}" doc: "Load multiple single-element structures to one, two, three, or four registers." arguments: ["ptr: {type[0]}"] @@ -9885,6 +12295,29 @@ intrinsics: - - 'ptr as *const i8' - '{type[4]}' + - name: "vld1{neon_type[1].no}" + doc: "Load multiple single-element structures to one, two, three, or four registers." + arguments: ["ptr: {type[0]}"] + return_type: "{neon_type[1]}" + safety: + unsafe: [neon] + attr: + - FnCall: [cfg, ['target_arch = "arm"']] + - FnCall: [target_feature, ['enable = "{type[3]}"']] + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]] + types: + - ['*const f16', float16x4_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', '_v4f16'] + - ['*const f16', float16x8_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v8f16'] + compose: + - FnCall: + - transmute + - - FnCall: + - "vld1{type[5]}" + - - 'ptr as *const i8' + - '{type[4]}' + - name: "vld1{neon_type[1].no}" doc: "Load multiple single-element structures to one, two, three, or four registers." arguments: ["ptr: {type[0]}"] @@ -10158,8 +12591,210 @@ intrinsics: - FnCall: [transmute, ["b.3"]] - c + - name: "vld4{neon_type[1].nox}" + doc: Load single 4-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld4]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16] + - ["*const f16", float16x8x4_t, f16] + compose: + - LLVMLink: + name: "vld4.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld4.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: arm + - FnCall: + - "_vld4{neon_type[1].nox}" + - - "a as _" + - "2" + + - name: "vld4{neon_type[1].nox}" + doc: Load single 4-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16] + - ["*const f16", float16x8x4_t, f16] + compose: + - LLVMLink: + name: "vld4.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: aarch64,arm64ec + - FnCall: + - "_vld4{neon_type[1].nox}" + - - "a as _" + + - name: "vld4{neon_type[1].dup_nox}" + doc: Load single 4-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld4]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16] + - ["*const f16", float16x8x4_t, f16] + compose: + - LLVMLink: + name: "vld4dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + - "size: i32" + links: + - link: "llvm.arm.neon.vld4dup.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: arm + - FnCall: + - "_vld4{neon_type[1].dup_nox}" + - - "a as _" + - "2" + + + - name: "vld4{neon_type[1].dup_nox}" + doc: Load single 4-element structure and replicate to all lanes of two registers + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4r]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16] + - ["*const f16", float16x8x4_t, f16] + compose: + - LLVMLink: + name: "vld4dup.{neon_type[1]}" + arguments: + - "ptr: {type[0]}" + links: + - link: "llvm.aarch64.neon.ld4r.v{neon_type[1].lane}{type[2]}.p0{type[2]}" + arch: aarch64,arm64ec + - FnCall: + - "_vld4{neon_type[1].dup_nox}" + - - "a as _" + + + - name: "vld4{neon_type[1].lane_nox}" + doc: Load multiple 4-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *enable-v7 + - *target-is-arm + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['vld4', 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x4_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld4.{neon_type[1]}" + arguments: + - "ptr: *const f16" + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "c: {neon_type[3]}" + - "d: {neon_type[3]}" + - "n: i32" + - "size: i32" + links: + - link: "llvm.arm.neon.vld4lane.v{neon_type[1].lane}{type[2]}.p0" + arch: arm + - FnCall: + - "_vld4{neon_type[1].lane_nox}" + - - "a as _" + - "b.0" + - "b.1" + - "b.2" + - "b.3" + - "LANE" + - "2" + + + - name: "vld4{neon_type[1].lane_nox}" + doc: Load multiple 4-element structures to two registers + arguments: ["a: {type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + attr: + - *target-not-arm + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["2"]] + - *neon-fp16 + - *neon-unstable-f16 + static_defs: + - "const LANE: i32" + safety: + unsafe: [neon] + types: + - ["*const f16", float16x4x4_t, f16, float16x4_t, "2"] + - ["*const f16", float16x8x4_t, f16, float16x8_t, "3"] + compose: + - FnCall: + - "static_assert_uimm_bits!" + - - LANE + - "{type[4]}" + - LLVMLink: + name: "vld4.{neon_type[1]}" + arguments: + - "a: {neon_type[3]}" + - "b: {neon_type[3]}" + - "c: {neon_type[3]}" + - "d: {neon_type[3]}" + - "n: i64" + - "ptr: *const f16" + links: + - link: "llvm.aarch64.neon.ld4lane.v{neon_type[1].lane}{type[2]}.p0" + arch: aarch64,arm64ec + - FnCall: + - "_vld4{neon_type[1].lane_nox}" + - - "b.0" + - "b.1" + - "b.2" + - "b.3" + - "LANE as i64" + - "a as _" + - name: "vcombine{neon_type[0].noq}" - doc: "Vector combine" + doc: Join two smaller vectors into a single larger vector arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: @@ -10230,7 +12865,7 @@ intrinsics: arch: aarch64,arm64ec - link: "llvm.arm.neon.aesd" arch: arm - + - name: "vaesmcq_u8" doc: "AES mix columns." arguments: ["data: {neon_type[0]}"] @@ -11544,7 +14179,7 @@ intrinsics: - FnCall: [transmute, ['a.2']] - FnCall: [transmute, ['a.3']] - FnCall: [transmute, [b]] - + - name: "vst1{type[0]}" visibility: private doc: "Store multiple single-element structures from one, two, three, or four registers." @@ -11574,6 +14209,28 @@ intrinsics: - link: "llvm.arm.neon.vst1.{neon_type[2]}.p0" arch: arm + - name: "vst1{type[0]}" + visibility: private + doc: "Store multiple single-element structures from one, two, three, or four registers." + arguments: ["addr: {type[1]}", "val: {neon_type[2]}", "align: {type[3]}"] + safety: + unsafe: [neon] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[4]}"']]}]] + types: + - ['_v4f16', '* const i8', float16x4_t, i32, '16'] + - ['q_v8f16', '* const i8', float16x8_t, i32, '16'] + compose: + - LLVMLink: + name: "_vst1{type[0]}" + links: + - link: "llvm.arm.neon.vst1.{neon_type[2]}.p0" + arch: arm + - name: "vst1{neon_type[1].no}" doc: "Store multiple single-element structures from one, two, three, or four registers." arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"] @@ -11616,6 +14273,29 @@ intrinsics: - '{type[3]}' - '{type[4]}' + + - name: "vst1{neon_type[1].no}" + doc: "Store multiple single-element structures from one, two, three, or four registers." + arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"] + safety: + unsafe: [neon] + attr: + - *target-is-arm + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[2]}"']]}]] + types: + - ['*mut f16', float16x4_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v4f16'] + - ['*mut f16', float16x8_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v8f16'] + compose: + - FnCall: + - "vst1{type[5]}" + - - 'ptr as *const i8' + - '{type[3]}' + - '{type[4]}' + + - name: "vshiftins{type[0]}" visibility: private doc: "Shift Right and Insert (immediate)" @@ -11780,3 +14460,75 @@ intrinsics: - - a - b - FnCall: ["{type[5]}", ["{type[6]}"]] + + - name: "vcombine{neon_type[0].no}" + doc: Join two smaller vectors into a single larger vector + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x8_t] + compose: + - FnCall: [simd_shuffle!, [a, b, '[0, 1, 2, 3, 4, 5, 6, 7]']] + + - name: "vget_{type[2]}_{neon_type[0]}" + doc: Duplicate vector element to vector + arguments: ["a: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - *neon-fp16 + - *neon-unstable-f16 + assert_instr: [nop] + safety: + unsafe: [neon] + types: + - [float16x4_t, float16x8_t, 'low', "[0, 1, 2, 3]"] + - [float16x4_t, float16x8_t, 'high', "[4, 5, 6, 7]"] + compose: + - FnCall: [simd_shuffle!, [a, a, "{type[3]}"]] + + - name: "vget{type[2]}" + doc: Duplicate vector element to scalar + arguments: ["a: {neon_type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - *neon-fp16 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ["1"]] + - *neon-unstable-f16 + static_defs: ['const LANE: i32'] + safety: + unsafe: [neon] + types: + - [float16x4_t, f16, '_lane_f16', '2'] + - [float16x8_t, f16, 'q_lane_f16', '3'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] + - FnCall: [simd_extract!, [a, "LANE as u32"]] + + - name: "vmov{neon_type[0].N}" + doc: "Duplicate element to vector" + arguments: ["a: {type[1]}"] + return_type: "{neon_type[0]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vdup.16"']]}]] + - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [dup]]}]] + - *neon-fp16 + - *neon-unstable-f16 + safety: + unsafe: [neon] + types: + - [float16x4_t, f16] + - [float16x8_t, f16] + compose: + - FnCall: ["vdup{neon_type[0].N}", [a]] diff --git a/crates/stdarch-verify/src/lib.rs b/crates/stdarch-verify/src/lib.rs index 5e4db96a70..a07c150575 100644 --- a/crates/stdarch-verify/src/lib.rs +++ b/crates/stdarch-verify/src/lib.rs @@ -290,6 +290,15 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream { "uint64x2x2_t" => quote! { &U64X2X2 }, "uint64x2x3_t" => quote! { &U64X2X3 }, "uint64x2x4_t" => quote! { &U64X2X4 }, + "float16x2_t" => quote! { &F16X2 }, + "float16x4_t" => quote! { &F16X4 }, + "float16x4x2_t" => quote! { &F16X4X2 }, + "float16x4x3_t" => quote! { &F16X4X3 }, + "float16x4x4_t" => quote! { &F16X4X4 }, + "float16x8_t" => quote! { &F16X8 }, + "float16x8x2_t" => quote! { &F16X8X2 }, + "float16x8x3_t" => quote! { &F16X8X3 }, + "float16x8x4_t" => quote! { &F16X8X4 }, "float32x2_t" => quote! { &F32X2 }, "float32x2x2_t" => quote! { &F32X2X2 }, "float32x2x3_t" => quote! { &F32X2X3 }, diff --git a/intrinsics_data/arm_intrinsics.json b/intrinsics_data/arm_intrinsics.json index a463564932..754c8f909a 100644 --- a/intrinsics_data/arm_intrinsics.json +++ b/intrinsics_data/arm_intrinsics.json @@ -1003,6 +1003,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vabd_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FABD" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vabd_f32", @@ -1260,6 +1288,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vabdh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FABD" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vabdl_high_s16", @@ -1596,6 +1651,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vabdq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FABD" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vabdq_f32", @@ -1853,6 +1936,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vabs_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FABS" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vabs_f32", @@ -2022,6 +2129,54 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vabsh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FABS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vabsq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FABS" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vabsq_f32", @@ -2168,6 +2323,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vadd_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FADD" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vadd_f32", @@ -2597,6 +2780,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vaddh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FADD" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vaddhn_high_s16", @@ -3569,6 +3780,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vaddq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FADD" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vaddq_f32", @@ -5949,6 +6188,39 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vbsl_f16", + "arguments": [ + "uint16x4_t a", + "float16x4_t b", + "float16x4_t c" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "c": { + "register": "Vm.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vbsl_f32", @@ -6375,6 +6647,39 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vbslq_f16", + "arguments": [ + "uint16x8_t a", + "float16x8_t b", + "float16x8_t c" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "BSL" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vbslq_f32", @@ -6801,6 +7106,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcadd_rot270_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H " + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCADD" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcadd_rot270_f32", @@ -6829,6 +7162,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcadd_rot90_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H " + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCADD" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcadd_rot90_f32", @@ -6857,6 +7218,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcaddq_rot270_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H " + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCADD" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcaddq_rot270_f32", @@ -6912,6 +7301,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcaddq_rot90_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H " + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCADD" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcaddq_rot90_f32", @@ -6967,6 +7384,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcage_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcage_f32", @@ -7050,6 +7495,61 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcageh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcageq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcageq_f32", @@ -7133,6 +7633,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcagt_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcagt_f32", @@ -7216,6 +7744,61 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcagth_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcagtq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcagtq_f32", @@ -7299,6 +7882,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcale_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcale_f32", @@ -7382,6 +7993,61 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcaleh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaleq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FACGE" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcaleq_f32", @@ -7465,6 +8131,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcalt_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcalt_f32", @@ -7548,6 +8242,61 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcalth_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcaltq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FACGT" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcaltq_f32", @@ -7631,6 +8380,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vceq_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vceq_f32", @@ -8053,6 +8830,61 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vceqh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vceqq_f32", @@ -8421,6 +9253,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vceqz_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vceqz_f32", @@ -8767,6 +9623,53 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vceqzh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vceqzq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMEQ" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vceqzq_f32", @@ -9067,6 +9970,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcge_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcge_f32", @@ -9432,6 +10363,61 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcgeh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgeq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcgeq_f32", @@ -9743,6 +10729,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcgez_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcgez_f32", @@ -9927,6 +10937,53 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcgezh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgezq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcgezq_f32", @@ -10088,6 +11145,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcgt_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcgt_f32", @@ -10453,6 +11538,61 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcgth_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcgtq_f32", @@ -10764,6 +11904,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcgtz_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcgtz_f32", @@ -10948,6 +12112,53 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcgtzh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcgtzq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcgtzq_f32", @@ -11109,6 +12320,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcle_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcle_f32", @@ -11474,6 +12713,61 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcleh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcleq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGE" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcleq_f32", @@ -11785,6 +13079,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vclez_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLE" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vclez_f32", @@ -11969,6 +13287,53 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vclezh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vclezq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLE" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vclezq_f32", @@ -12430,6 +13795,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vclt_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vclt_f32", @@ -12795,6 +14188,61 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vclth_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMGT" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcltq_f32", @@ -13106,6 +14554,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcltz_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLT" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcltz_f32", @@ -13290,6 +14762,53 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vcltzh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCMLT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcltzq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLT" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vcltzq_f32", @@ -13753,20 +15272,20 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmla_f32", + "name": "vcmla_f16", "arguments": [ - "float32x2_t r", - "float32x2_t a", - "float32x2_t b" + "float16x4_t r", + "float16x4_t a", + "float16x4_t b" ], "return_type": { - "value": "float32x2_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": {}, "b": {}, "r": { - "register": "Vd.2S" + "register": "Vd.4H" } }, "Architectures": [ @@ -13781,12 +15300,11 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmla_lane_f32", + "name": "vcmla_f32", "arguments": [ "float32x2_t r", "float32x2_t a", - "float32x2_t b", - "const int lane" + "float32x2_t b" ], "return_type": { "value": "float32x2_t" @@ -13794,10 +15312,6 @@ "Arguments_Preparation": { "a": {}, "b": {}, - "lane": { - "minimum": 0, - "maximum": 0 - }, "r": { "register": "Vd.2S" } @@ -13814,15 +15328,15 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmla_laneq_f32", + "name": "vcmla_lane_f16", "arguments": [ - "float32x2_t r", - "float32x2_t a", - "float32x4_t b", + "float16x4_t r", + "float16x4_t a", + "float16x4_t b", "const int lane" ], "return_type": { - "value": "float32x2_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": {}, @@ -13832,7 +15346,7 @@ "maximum": 1 }, "r": { - "register": "Vd.2S" + "register": "Vd.4H" } }, "Architectures": [ @@ -13841,18 +15355,18 @@ ], "instructions": [ [ - "DUP", "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcmla_rot180_f32", + "name": "vcmla_lane_f32", "arguments": [ "float32x2_t r", "float32x2_t a", - "float32x2_t b" + "float32x2_t b", + "const int lane" ], "return_type": { "value": "float32x2_t" @@ -13860,6 +15374,10 @@ "Arguments_Preparation": { "a": {}, "b": {}, + "lane": { + "minimum": 0, + "maximum": 0 + }, "r": { "register": "Vd.2S" } @@ -13876,25 +15394,25 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmla_rot180_lane_f32", + "name": "vcmla_laneq_f16", "arguments": [ - "float32x2_t r", - "float32x2_t a", - "float32x2_t b", + "float16x4_t r", + "float16x4_t a", + "float16x8_t b", "const int lane" ], "return_type": { - "value": "float32x2_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": {}, "b": {}, "lane": { "minimum": 0, - "maximum": 0 + "maximum": 1 }, "r": { - "register": "Vd.2S" + "register": "Vd.4H" } }, "Architectures": [ @@ -13909,7 +15427,7 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmla_rot180_laneq_f32", + "name": "vcmla_laneq_f32", "arguments": [ "float32x2_t r", "float32x2_t a", @@ -13943,20 +15461,20 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmla_rot270_f32", + "name": "vcmla_rot180_f16", "arguments": [ - "float32x2_t r", - "float32x2_t a", - "float32x2_t b" + "float16x4_t r", + "float16x4_t a", + "float16x4_t b" ], "return_type": { - "value": "float32x2_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": {}, "b": {}, "r": { - "register": "Vd.2S" + "register": "Vd.4H" } }, "Architectures": [ @@ -13971,12 +15489,11 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmla_rot270_lane_f32", + "name": "vcmla_rot180_f32", "arguments": [ "float32x2_t r", "float32x2_t a", - "float32x2_t b", - "const int lane" + "float32x2_t b" ], "return_type": { "value": "float32x2_t" @@ -13984,10 +15501,6 @@ "Arguments_Preparation": { "a": {}, "b": {}, - "lane": { - "minimum": 0, - "maximum": 0 - }, "r": { "register": "Vd.2S" } @@ -14004,15 +15517,15 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmla_rot270_laneq_f32", + "name": "vcmla_rot180_lane_f16", "arguments": [ - "float32x2_t r", - "float32x2_t a", - "float32x4_t b", + "float16x4_t r", + "float16x4_t a", + "float16x4_t b", "const int lane" ], "return_type": { - "value": "float32x2_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": {}, @@ -14022,7 +15535,7 @@ "maximum": 1 }, "r": { - "register": "Vd.2S" + "register": "Vd.4H" } }, "Architectures": [ @@ -14031,18 +15544,18 @@ ], "instructions": [ [ - "DUP", "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcmla_rot90_f32", + "name": "vcmla_rot180_lane_f32", "arguments": [ "float32x2_t r", "float32x2_t a", - "float32x2_t b" + "float32x2_t b", + "const int lane" ], "return_type": { "value": "float32x2_t" @@ -14050,6 +15563,10 @@ "Arguments_Preparation": { "a": {}, "b": {}, + "lane": { + "minimum": 0, + "maximum": 0 + }, "r": { "register": "Vd.2S" } @@ -14066,25 +15583,25 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmla_rot90_lane_f32", + "name": "vcmla_rot180_laneq_f16", "arguments": [ - "float32x2_t r", - "float32x2_t a", - "float32x2_t b", + "float16x4_t r", + "float16x4_t a", + "float16x8_t b", "const int lane" ], "return_type": { - "value": "float32x2_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": {}, "b": {}, "lane": { "minimum": 0, - "maximum": 0 + "maximum": 1 }, "r": { - "register": "Vd.2S" + "register": "Vd.4H" } }, "Architectures": [ @@ -14099,7 +15616,7 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmla_rot90_laneq_f32", + "name": "vcmla_rot180_laneq_f32", "arguments": [ "float32x2_t r", "float32x2_t a", @@ -14133,20 +15650,20 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmlaq_f32", + "name": "vcmla_rot270_f16", "arguments": [ - "float32x4_t r", - "float32x4_t a", - "float32x4_t b" + "float16x4_t r", + "float16x4_t a", + "float16x4_t b" ], "return_type": { - "value": "float32x4_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": {}, "b": {}, "r": { - "register": "Vd.4S" + "register": "Vd.4H" } }, "Architectures": [ @@ -14161,23 +15678,24 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmlaq_f64", + "name": "vcmla_rot270_f32", "arguments": [ - "float64x2_t r", - "float64x2_t a", - "float64x2_t b" + "float32x2_t r", + "float32x2_t a", + "float32x2_t b" ], "return_type": { - "value": "float64x2_t" + "value": "float32x2_t" }, "Arguments_Preparation": { "a": {}, "b": {}, "r": { - "register": "Vd.2D" + "register": "Vd.2S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ @@ -14188,15 +15706,48 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmlaq_lane_f32", + "name": "vcmla_rot270_lane_f16", "arguments": [ - "float32x4_t r", - "float32x4_t a", + "float16x4_t r", + "float16x4_t a", + "float16x4_t b", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCMLA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcmla_rot270_lane_f32", + "arguments": [ + "float32x2_t r", + "float32x2_t a", "float32x2_t b", "const int lane" ], "return_type": { - "value": "float32x4_t" + "value": "float32x2_t" }, "Arguments_Preparation": { "a": {}, @@ -14206,7 +15757,7 @@ "maximum": 0 }, "r": { - "register": "Vd.4S" + "register": "Vd.2S" } }, "Architectures": [ @@ -14221,15 +15772,15 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmlaq_laneq_f32", + "name": "vcmla_rot270_laneq_f16", "arguments": [ - "float32x4_t r", - "float32x4_t a", - "float32x4_t b", + "float16x4_t r", + "float16x4_t a", + "float16x8_t b", "const int lane" ], "return_type": { - "value": "float32x4_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": {}, @@ -14239,7 +15790,7 @@ "maximum": 1 }, "r": { - "register": "Vd.4S" + "register": "Vd.4H" } }, "Architectures": [ @@ -14254,20 +15805,25 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmlaq_rot180_f32", + "name": "vcmla_rot270_laneq_f32", "arguments": [ - "float32x4_t r", - "float32x4_t a", - "float32x4_t b" + "float32x2_t r", + "float32x2_t a", + "float32x4_t b", + "const int lane" ], "return_type": { - "value": "float32x4_t" + "value": "float32x2_t" }, "Arguments_Preparation": { "a": {}, "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, "r": { - "register": "Vd.4S" + "register": "Vd.2S" } }, "Architectures": [ @@ -14276,29 +15832,31 @@ ], "instructions": [ [ + "DUP", "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcmlaq_rot180_f64", + "name": "vcmla_rot90_f16", "arguments": [ - "float64x2_t r", - "float64x2_t a", - "float64x2_t b" + "float16x4_t r", + "float16x4_t a", + "float16x4_t b" ], "return_type": { - "value": "float64x2_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": {}, "b": {}, "r": { - "register": "Vd.2D" + "register": "Vd.4H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ @@ -14309,25 +15867,20 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmlaq_rot180_lane_f32", + "name": "vcmla_rot90_f32", "arguments": [ - "float32x4_t r", - "float32x4_t a", - "float32x2_t b", - "const int lane" + "float32x2_t r", + "float32x2_t a", + "float32x2_t b" ], "return_type": { - "value": "float32x4_t" + "value": "float32x2_t" }, "Arguments_Preparation": { "a": {}, "b": {}, - "lane": { - "minimum": 0, - "maximum": 0 - }, "r": { - "register": "Vd.4S" + "register": "Vd.2S" } }, "Architectures": [ @@ -14342,15 +15895,15 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmlaq_rot180_laneq_f32", + "name": "vcmla_rot90_lane_f16", "arguments": [ - "float32x4_t r", - "float32x4_t a", - "float32x4_t b", + "float16x4_t r", + "float16x4_t a", + "float16x4_t b", "const int lane" ], "return_type": { - "value": "float32x4_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": {}, @@ -14360,7 +15913,7 @@ "maximum": 1 }, "r": { - "register": "Vd.4S" + "register": "Vd.4H" } }, "Architectures": [ @@ -14375,20 +15928,25 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmlaq_rot270_f32", + "name": "vcmla_rot90_lane_f32", "arguments": [ - "float32x4_t r", - "float32x4_t a", - "float32x4_t b" + "float32x2_t r", + "float32x2_t a", + "float32x2_t b", + "const int lane" ], "return_type": { - "value": "float32x4_t" + "value": "float32x2_t" }, "Arguments_Preparation": { "a": {}, "b": {}, + "lane": { + "minimum": 0, + "maximum": 0 + }, "r": { - "register": "Vd.4S" + "register": "Vd.2S" } }, "Architectures": [ @@ -14403,23 +15961,29 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmlaq_rot270_f64", + "name": "vcmla_rot90_laneq_f16", "arguments": [ - "float64x2_t r", - "float64x2_t a", - "float64x2_t b" + "float16x4_t r", + "float16x4_t a", + "float16x8_t b", + "const int lane" ], "return_type": { - "value": "float64x2_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": {}, "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, "r": { - "register": "Vd.2D" + "register": "Vd.4H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ @@ -14430,25 +15994,25 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmlaq_rot270_lane_f32", + "name": "vcmla_rot90_laneq_f32", "arguments": [ - "float32x4_t r", - "float32x4_t a", - "float32x2_t b", + "float32x2_t r", + "float32x2_t a", + "float32x4_t b", "const int lane" ], "return_type": { - "value": "float32x4_t" + "value": "float32x2_t" }, "Arguments_Preparation": { "a": {}, "b": {}, "lane": { "minimum": 0, - "maximum": 0 + "maximum": 1 }, "r": { - "register": "Vd.4S" + "register": "Vd.2S" } }, "Architectures": [ @@ -14457,31 +16021,27 @@ ], "instructions": [ [ + "DUP", "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcmlaq_rot270_laneq_f32", + "name": "vcmlaq_f16", "arguments": [ - "float32x4_t r", - "float32x4_t a", - "float32x4_t b", - "const int lane" + "float16x8_t r", + "float16x8_t a", + "float16x8_t b" ], "return_type": { - "value": "float32x4_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": {}, "b": {}, - "lane": { - "minimum": 0, - "maximum": 1 - }, "r": { - "register": "Vd.4S" + "register": "Vd.8H" } }, "Architectures": [ @@ -14496,7 +16056,7 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmlaq_rot90_f32", + "name": "vcmlaq_f32", "arguments": [ "float32x4_t r", "float32x4_t a", @@ -14524,7 +16084,7 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmlaq_rot90_f64", + "name": "vcmlaq_f64", "arguments": [ "float64x2_t r", "float64x2_t a", @@ -14551,25 +16111,25 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmlaq_rot90_lane_f32", + "name": "vcmlaq_lane_f16", "arguments": [ - "float32x4_t r", - "float32x4_t a", - "float32x2_t b", + "float16x8_t r", + "float16x8_t a", + "float16x4_t b", "const int lane" ], "return_type": { - "value": "float32x4_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": {}, "b": {}, "lane": { "minimum": 0, - "maximum": 0 + "maximum": 1 }, "r": { - "register": "Vd.4S" + "register": "Vd.8H" } }, "Architectures": [ @@ -14584,11 +16144,11 @@ }, { "SIMD_ISA": "Neon", - "name": "vcmlaq_rot90_laneq_f32", + "name": "vcmlaq_lane_f32", "arguments": [ "float32x4_t r", "float32x4_t a", - "float32x4_t b", + "float32x2_t b", "const int lane" ], "return_type": { @@ -14599,7 +16159,7 @@ "b": {}, "lane": { "minimum": 0, - "maximum": 1 + "maximum": 0 }, "r": { "register": "Vd.4S" @@ -14617,258 +16177,301 @@ }, { "SIMD_ISA": "Neon", - "name": "vcnt_p8", + "name": "vcmlaq_laneq_f16", "arguments": [ - "poly8x8_t a" + "float16x8_t r", + "float16x8_t a", + "float16x8_t b", + "const int lane" ], "return_type": { - "value": "poly8x8_t" + "value": "float16x8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.8B" + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "CNT" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcnt_s8", + "name": "vcmlaq_laneq_f32", "arguments": [ - "int8x8_t a" + "float32x4_t r", + "float32x4_t a", + "float32x4_t b", + "const int lane" ], "return_type": { - "value": "int8x8_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.8B" + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "CNT" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcnt_u8", + "name": "vcmlaq_rot180_f16", "arguments": [ - "uint8x8_t a" + "float16x8_t r", + "float16x8_t a", + "float16x8_t b" ], "return_type": { - "value": "uint8x8_t" + "value": "float16x8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.8B" + "a": {}, + "b": {}, + "r": { + "register": "Vd.8H" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "CNT" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcntq_p8", + "name": "vcmlaq_rot180_f32", "arguments": [ - "poly8x16_t a" + "float32x4_t r", + "float32x4_t a", + "float32x4_t b" ], "return_type": { - "value": "poly8x16_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.16B" + "a": {}, + "b": {}, + "r": { + "register": "Vd.4S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "CNT" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcntq_s8", + "name": "vcmlaq_rot180_f64", "arguments": [ - "int8x16_t a" + "float64x2_t r", + "float64x2_t a", + "float64x2_t b" ], "return_type": { - "value": "int8x16_t" + "value": "float64x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.16B" + "a": {}, + "b": {}, + "r": { + "register": "Vd.2D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "CNT" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcntq_u8", + "name": "vcmlaq_rot180_lane_f16", "arguments": [ - "uint8x16_t a" + "float16x8_t r", + "float16x8_t a", + "float16x4_t b", + "const int lane" ], "return_type": { - "value": "uint8x16_t" + "value": "float16x8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.16B" + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.8H" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "CNT" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcombine_f32", + "name": "vcmlaq_rot180_lane_f32", "arguments": [ - "float32x2_t low", - "float32x2_t high" + "float32x4_t r", + "float32x4_t a", + "float32x2_t b", + "const int lane" ], "return_type": { "value": "float32x4_t" }, "Arguments_Preparation": { - "high": { - "register": "Vm.2S" + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 0 }, - "low": { - "register": "Vn.2S" + "r": { + "register": "Vd.4S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "DUP", - "INS" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcombine_f64", + "name": "vcmlaq_rot180_laneq_f16", "arguments": [ - "float64x1_t low", - "float64x1_t high" + "float16x8_t r", + "float16x8_t a", + "float16x8_t b", + "const int lane" ], "return_type": { - "value": "float64x2_t" + "value": "float16x8_t" }, "Arguments_Preparation": { - "high": { - "register": "Vm.1D" + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 }, - "low": { - "register": "Vn.1D" + "r": { + "register": "Vd.8H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "DUP", - "INS" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcombine_p16", + "name": "vcmlaq_rot180_laneq_f32", "arguments": [ - "poly16x4_t low", - "poly16x4_t high" + "float32x4_t r", + "float32x4_t a", + "float32x4_t b", + "const int lane" ], "return_type": { - "value": "poly16x8_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "high": { - "register": "Vm.4H" + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 }, - "low": { - "register": "Vn.4H" + "r": { + "register": "Vd.4S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "DUP", - "INS" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcombine_p64", + "name": "vcmlaq_rot270_f16", "arguments": [ - "poly64x1_t low", - "poly64x1_t high" + "float16x8_t r", + "float16x8_t a", + "float16x8_t b" ], "return_type": { - "value": "poly64x2_t" + "value": "float16x8_t" }, "Arguments_Preparation": { - "high": { - "register": "Vm.1D" - }, - "low": { - "register": "Vn.1D" + "a": {}, + "b": {}, + "r": { + "register": "Vd.8H" } }, "Architectures": [ @@ -14877,418 +16480,400 @@ ], "instructions": [ [ - "DUP", - "INS" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcombine_p8", + "name": "vcmlaq_rot270_f32", "arguments": [ - "poly8x8_t low", - "poly8x8_t high" + "float32x4_t r", + "float32x4_t a", + "float32x4_t b" ], "return_type": { - "value": "poly8x16_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "high": { - "register": "Vm.8B" - }, - "low": { - "register": "Vn.8B" + "a": {}, + "b": {}, + "r": { + "register": "Vd.4S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "DUP", - "INS" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcombine_s16", + "name": "vcmlaq_rot270_f64", "arguments": [ - "int16x4_t low", - "int16x4_t high" + "float64x2_t r", + "float64x2_t a", + "float64x2_t b" ], "return_type": { - "value": "int16x8_t" + "value": "float64x2_t" }, "Arguments_Preparation": { - "high": { - "register": "Vm.4H" - }, - "low": { - "register": "Vn.4H" + "a": {}, + "b": {}, + "r": { + "register": "Vd.2D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "DUP", - "INS" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcombine_s32", + "name": "vcmlaq_rot270_lane_f16", "arguments": [ - "int32x2_t low", - "int32x2_t high" + "float16x8_t r", + "float16x8_t a", + "float16x4_t b", + "const int lane" ], "return_type": { - "value": "int32x4_t" + "value": "float16x8_t" }, "Arguments_Preparation": { - "high": { - "register": "Vm.2S" + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 }, - "low": { - "register": "Vn.2S" + "r": { + "register": "Vd.8H" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "DUP", - "INS" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcombine_s64", + "name": "vcmlaq_rot270_lane_f32", "arguments": [ - "int64x1_t low", - "int64x1_t high" + "float32x4_t r", + "float32x4_t a", + "float32x2_t b", + "const int lane" ], "return_type": { - "value": "int64x2_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "high": { - "register": "Vm.1D" + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 0 }, - "low": { - "register": "Vn.1D" + "r": { + "register": "Vd.4S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "DUP", - "INS" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcombine_s8", + "name": "vcmlaq_rot270_laneq_f16", "arguments": [ - "int8x8_t low", - "int8x8_t high" + "float16x8_t r", + "float16x8_t a", + "float16x8_t b", + "const int lane" ], "return_type": { - "value": "int8x16_t" + "value": "float16x8_t" }, "Arguments_Preparation": { - "high": { - "register": "Vm.8B" + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 3 }, - "low": { - "register": "Vn.8B" + "r": { + "register": "Vd.8H" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "DUP", - "INS" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcombine_u16", + "name": "vcmlaq_rot270_laneq_f32", "arguments": [ - "uint16x4_t low", - "uint16x4_t high" + "float32x4_t r", + "float32x4_t a", + "float32x4_t b", + "const int lane" ], "return_type": { - "value": "uint16x8_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "high": { - "register": "Vm.4H" + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 1 }, - "low": { - "register": "Vn.4H" + "r": { + "register": "Vd.4S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "DUP", - "INS" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcombine_u32", + "name": "vcmlaq_rot90_f16", "arguments": [ - "uint32x2_t low", - "uint32x2_t high" + "float16x8_t r", + "float16x8_t a", + "float16x8_t b" ], "return_type": { - "value": "uint32x4_t" + "value": "float16x8_t" }, "Arguments_Preparation": { - "high": { - "register": "Vm.2S" - }, - "low": { - "register": "Vn.2S" + "a": {}, + "b": {}, + "r": { + "register": "Vd.8H" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "DUP", - "INS" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcombine_u64", + "name": "vcmlaq_rot90_f32", "arguments": [ - "uint64x1_t low", - "uint64x1_t high" + "float32x4_t r", + "float32x4_t a", + "float32x4_t b" ], "return_type": { - "value": "uint64x2_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "high": { - "register": "Vm.1D" - }, - "low": { - "register": "Vn.1D" + "a": {}, + "b": {}, + "r": { + "register": "Vd.4S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "DUP", - "INS" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcombine_u8", + "name": "vcmlaq_rot90_f64", "arguments": [ - "uint8x8_t low", - "uint8x8_t high" + "float64x2_t r", + "float64x2_t a", + "float64x2_t b" ], "return_type": { - "value": "uint8x16_t" + "value": "float64x2_t" }, "Arguments_Preparation": { - "high": { - "register": "Vm.8B" - }, - "low": { - "register": "Vn.8B" + "a": {}, + "b": {}, + "r": { + "register": "Vd.2D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "DUP", - "INS" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_lane_f32", + "name": "vcmlaq_rot90_lane_f16", "arguments": [ - "float32x2_t a", - "const int lane1", - "float32x2_t b", - "const int lane2" + "float16x8_t r", + "float16x8_t a", + "float16x4_t b", + "const int lane" ], "return_type": { - "value": "float32x2_t" + "value": "float16x8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vd.2S" - }, - "b": { - "register": "Vn.2S" - }, - "lane1": { + "a": {}, + "b": {}, + "lane": { "minimum": 0, "maximum": 1 }, - "lane2": { - "minimum": 0, - "maximum": 1 + "r": { + "register": "Vd.8H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_lane_f64", + "name": "vcmlaq_rot90_lane_f32", "arguments": [ - "float64x1_t a", - "const int lane1", - "float64x1_t b", - "const int lane2" + "float32x4_t r", + "float32x4_t a", + "float32x2_t b", + "const int lane" ], "return_type": { - "value": "float64x1_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "UNUSED" - }, - "b": { - "register": "Vn.1D" - }, - "lane1": { + "a": {}, + "b": {}, + "lane": { "minimum": 0, "maximum": 0 }, - "lane2": { - "minimum": 0, - "maximum": 0 + "r": { + "register": "Vd.4S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "DUP" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_lane_p16", + "name": "vcmlaq_rot90_laneq_f16", "arguments": [ - "poly16x4_t a", - "const int lane1", - "poly16x4_t b", - "const int lane2" + "float16x8_t r", + "float16x8_t a", + "float16x8_t b", + "const int lane" ], "return_type": { - "value": "poly16x4_t" + "value": "float16x8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vd.4H" - }, - "b": { - "register": "Vn.4H" - }, - "lane1": { + "a": {}, + "b": {}, + "lane": { "minimum": 0, "maximum": 3 }, - "lane2": { - "minimum": 0, - "maximum": 3 + "r": { + "register": "Vd.8H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_lane_p64", + "name": "vcmlaq_rot90_laneq_f32", "arguments": [ - "poly64x1_t a", - "const int lane1", - "poly64x1_t b", - "const int lane2" + "float32x4_t r", + "float32x4_t a", + "float32x4_t b", + "const int lane" ], "return_type": { - "value": "poly64x1_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "UNUSED" - }, - "b": { - "register": "Vn.1D" - }, - "lane1": { + "a": {}, + "b": {}, + "lane": { "minimum": 0, - "maximum": 0 + "maximum": 1 }, - "lane2": { - "minimum": 0, - "maximum": 0 + "r": { + "register": "Vd.4S" } }, "Architectures": [ @@ -15297,258 +16882,4873 @@ ], "instructions": [ [ - "DUP" + "FCMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_lane_p8", + "name": "vcnt_p8", "arguments": [ - "poly8x8_t a", - "const int lane1", - "poly8x8_t b", - "const int lane2" + "poly8x8_t a" ], "return_type": { "value": "poly8x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.8B" - }, - "b": { "register": "Vn.8B" - }, - "lane1": { - "minimum": 0, - "maximum": 7 - }, - "lane2": { - "minimum": 0, - "maximum": 7 } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "INS" + "CNT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_lane_s16", + "name": "vcnt_s8", "arguments": [ - "int16x4_t a", - "const int lane1", - "int16x4_t b", - "const int lane2" + "int8x8_t a" ], "return_type": { - "value": "int16x4_t" + "value": "int8x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.4H" + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CNT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcnt_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CNT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcntq_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CNT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcntq_s8", + "arguments": [ + "int8x16_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CNT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcntq_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "CNT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_f16", + "arguments": [ + "float16x4_t low", + "float16x4_t high" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.4H" }, - "b": { + "low": { "register": "Vn.4H" - }, - "lane1": { - "minimum": 0, - "maximum": 3 - }, - "lane2": { - "minimum": 0, - "maximum": 3 } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ + "DUP", "INS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_lane_s32", + "name": "vcombine_f32", "arguments": [ - "int32x2_t a", - "const int lane1", - "int32x2_t b", - "const int lane2" + "float32x2_t low", + "float32x2_t high" ], "return_type": { - "value": "int32x2_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vd.2S" + "high": { + "register": "Vm.2S" }, - "b": { + "low": { "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_f64", + "arguments": [ + "float64x1_t low", + "float64x1_t high" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.1D" }, - "lane1": { - "minimum": 0, - "maximum": 1 + "low": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_p16", + "arguments": [ + "poly16x4_t low", + "poly16x4_t high" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.4H" }, - "lane2": { - "minimum": 0, - "maximum": 1 + "low": { + "register": "Vn.4H" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ + "DUP", "INS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_lane_s64", + "name": "vcombine_p64", "arguments": [ - "int64x1_t a", - "const int lane1", - "int64x1_t b", - "const int lane2" + "poly64x1_t low", + "poly64x1_t high" ], "return_type": { - "value": "int64x1_t" + "value": "poly64x2_t" }, "Arguments_Preparation": { - "a": { - "register": "UNUSED" + "high": { + "register": "Vm.1D" }, - "b": { + "low": { "register": "Vn.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_p8", + "arguments": [ + "poly8x8_t low", + "poly8x8_t high" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.8B" }, - "lane1": { - "minimum": 0, - "maximum": 0 + "low": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_s16", + "arguments": [ + "int16x4_t low", + "int16x4_t high" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.4H" }, - "lane2": { - "minimum": 0, - "maximum": 0 + "low": { + "register": "Vn.4H" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "DUP", + "INS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_lane_s8", + "name": "vcombine_s32", "arguments": [ - "int8x8_t a", - "const int lane1", - "int8x8_t b", - "const int lane2" + "int32x2_t low", + "int32x2_t high" ], "return_type": { - "value": "int8x8_t" + "value": "int32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vd.8B" + "high": { + "register": "Vm.2S" }, - "b": { - "register": "Vn.8B" + "low": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_s64", + "arguments": [ + "int64x1_t low", + "int64x1_t high" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.1D" }, - "lane1": { - "minimum": 0, - "maximum": 7 + "low": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_s8", + "arguments": [ + "int8x8_t low", + "int8x8_t high" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.8B" }, - "lane2": { - "minimum": 0, - "maximum": 7 + "low": { + "register": "Vn.8B" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ + "DUP", "INS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_lane_u16", + "name": "vcombine_u16", "arguments": [ - "uint16x4_t a", - "const int lane1", - "uint16x4_t b", - "const int lane2" + "uint16x4_t low", + "uint16x4_t high" ], "return_type": { - "value": "uint16x4_t" + "value": "uint16x8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vd.4H" + "high": { + "register": "Vm.4H" }, - "b": { + "low": { "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_u32", + "arguments": [ + "uint32x2_t low", + "uint32x2_t high" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.2S" }, - "lane1": { - "minimum": 0, - "maximum": 3 + "low": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_u64", + "arguments": [ + "uint64x1_t low", + "uint64x1_t high" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.1D" }, - "lane2": { - "minimum": 0, - "maximum": 3 + "low": { + "register": "Vn.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcombine_u8", + "arguments": [ + "uint8x8_t low", + "uint8x8_t high" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "high": { + "register": "Vm.8B" + }, + "low": { + "register": "Vn.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP", + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_f32", + "arguments": [ + "float32x2_t a", + "const int lane1", + "float32x2_t b", + "const int lane2" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_f64", + "arguments": [ + "float64x1_t a", + "const int lane1", + "float64x1_t b", + "const int lane2" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "UNUSED" + }, + "b": { + "register": "Vn.1D" + }, + "lane1": { + "minimum": 0, + "maximum": 0 + }, + "lane2": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_p16", + "arguments": [ + "poly16x4_t a", + "const int lane1", + "poly16x4_t b", + "const int lane2" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_p64", + "arguments": [ + "poly64x1_t a", + "const int lane1", + "poly64x1_t b", + "const int lane2" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "UNUSED" + }, + "b": { + "register": "Vn.1D" + }, + "lane1": { + "minimum": 0, + "maximum": 0 + }, + "lane2": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_p8", + "arguments": [ + "poly8x8_t a", + "const int lane1", + "poly8x8_t b", + "const int lane2" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_s16", + "arguments": [ + "int16x4_t a", + "const int lane1", + "int16x4_t b", + "const int lane2" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_s32", + "arguments": [ + "int32x2_t a", + "const int lane1", + "int32x2_t b", + "const int lane2" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_s64", + "arguments": [ + "int64x1_t a", + "const int lane1", + "int64x1_t b", + "const int lane2" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "UNUSED" + }, + "b": { + "register": "Vn.1D" + }, + "lane1": { + "minimum": 0, + "maximum": 0 + }, + "lane2": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_s8", + "arguments": [ + "int8x8_t a", + "const int lane1", + "int8x8_t b", + "const int lane2" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_u16", + "arguments": [ + "uint16x4_t a", + "const int lane1", + "uint16x4_t b", + "const int lane2" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_u32", + "arguments": [ + "uint32x2_t a", + "const int lane1", + "uint32x2_t b", + "const int lane2" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.2S" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_u64", + "arguments": [ + "uint64x1_t a", + "const int lane1", + "uint64x1_t b", + "const int lane2" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "UNUSED" + }, + "b": { + "register": "Vn.1D" + }, + "lane1": { + "minimum": 0, + "maximum": 0 + }, + "lane2": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_lane_u8", + "arguments": [ + "uint8x8_t a", + "const int lane1", + "uint8x8_t b", + "const int lane2" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.8B" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_f32", + "arguments": [ + "float32x2_t a", + "const int lane1", + "float32x4_t b", + "const int lane2" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.4S" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_f64", + "arguments": [ + "float64x1_t a", + "const int lane1", + "float64x2_t b", + "const int lane2" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "UNUSED" + }, + "b": { + "register": "Vn.2D" + }, + "lane1": { + "minimum": 0, + "maximum": 0 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_p16", + "arguments": [ + "poly16x4_t a", + "const int lane1", + "poly16x8_t b", + "const int lane2" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.8H" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_p64", + "arguments": [ + "poly64x1_t a", + "const int lane1", + "poly64x2_t b", + "const int lane2" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "UNUSED" + }, + "b": { + "register": "Vn.2D" + }, + "lane1": { + "minimum": 0, + "maximum": 0 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_p8", + "arguments": [ + "poly8x8_t a", + "const int lane1", + "poly8x16_t b", + "const int lane2" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.16B" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_s16", + "arguments": [ + "int16x4_t a", + "const int lane1", + "int16x8_t b", + "const int lane2" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.8H" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_s32", + "arguments": [ + "int32x2_t a", + "const int lane1", + "int32x4_t b", + "const int lane2" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.4S" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_s64", + "arguments": [ + "int64x1_t a", + "const int lane1", + "int64x2_t b", + "const int lane2" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "UNUSED" + }, + "b": { + "register": "Vn.2D" + }, + "lane1": { + "minimum": 0, + "maximum": 0 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_s8", + "arguments": [ + "int8x8_t a", + "const int lane1", + "int8x16_t b", + "const int lane2" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.16B" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_u16", + "arguments": [ + "uint16x4_t a", + "const int lane1", + "uint16x8_t b", + "const int lane2" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.8H" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_u32", + "arguments": [ + "uint32x2_t a", + "const int lane1", + "uint32x4_t b", + "const int lane2" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + }, + "b": { + "register": "Vn.4S" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_u64", + "arguments": [ + "uint64x1_t a", + "const int lane1", + "uint64x2_t b", + "const int lane2" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "UNUSED" + }, + "b": { + "register": "Vn.2D" + }, + "lane1": { + "minimum": 0, + "maximum": 0 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopy_laneq_u8", + "arguments": [ + "uint8x8_t a", + "const int lane1", + "uint8x16_t b", + "const int lane2" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + }, + "b": { + "register": "Vn.16B" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_f32", + "arguments": [ + "float32x4_t a", + "const int lane1", + "float32x2_t b", + "const int lane2" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.2S" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_f64", + "arguments": [ + "float64x2_t a", + "const int lane1", + "float64x1_t b", + "const int lane2" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.1D" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_p16", + "arguments": [ + "poly16x8_t a", + "const int lane1", + "poly16x4_t b", + "const int lane2" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.4H" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_p64", + "arguments": [ + "poly64x2_t a", + "const int lane1", + "poly64x1_t b", + "const int lane2" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.1D" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_p8", + "arguments": [ + "poly8x16_t a", + "const int lane1", + "poly8x8_t b", + "const int lane2" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.8B" + }, + "lane1": { + "minimum": 0, + "maximum": 15 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_s16", + "arguments": [ + "int16x8_t a", + "const int lane1", + "int16x4_t b", + "const int lane2" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.4H" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_s32", + "arguments": [ + "int32x4_t a", + "const int lane1", + "int32x2_t b", + "const int lane2" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.2S" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_s64", + "arguments": [ + "int64x2_t a", + "const int lane1", + "int64x1_t b", + "const int lane2" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.1D" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_s8", + "arguments": [ + "int8x16_t a", + "const int lane1", + "int8x8_t b", + "const int lane2" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.8B" + }, + "lane1": { + "minimum": 0, + "maximum": 15 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_u16", + "arguments": [ + "uint16x8_t a", + "const int lane1", + "uint16x4_t b", + "const int lane2" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.4H" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_u32", + "arguments": [ + "uint32x4_t a", + "const int lane1", + "uint32x2_t b", + "const int lane2" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.2S" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_u64", + "arguments": [ + "uint64x2_t a", + "const int lane1", + "uint64x1_t b", + "const int lane2" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.1D" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 0 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_lane_u8", + "arguments": [ + "uint8x16_t a", + "const int lane1", + "uint8x8_t b", + "const int lane2" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.8B" + }, + "lane1": { + "minimum": 0, + "maximum": 15 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_f32", + "arguments": [ + "float32x4_t a", + "const int lane1", + "float32x4_t b", + "const int lane2" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_f64", + "arguments": [ + "float64x2_t a", + "const int lane1", + "float64x2_t b", + "const int lane2" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_p16", + "arguments": [ + "poly16x8_t a", + "const int lane1", + "poly16x8_t b", + "const int lane2" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_p64", + "arguments": [ + "poly64x2_t a", + "const int lane1", + "poly64x2_t b", + "const int lane2" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_p8", + "arguments": [ + "poly8x16_t a", + "const int lane1", + "poly8x16_t b", + "const int lane2" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "lane1": { + "minimum": 0, + "maximum": 15 + }, + "lane2": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_s16", + "arguments": [ + "int16x8_t a", + "const int lane1", + "int16x8_t b", + "const int lane2" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_s32", + "arguments": [ + "int32x4_t a", + "const int lane1", + "int32x4_t b", + "const int lane2" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_s64", + "arguments": [ + "int64x2_t a", + "const int lane1", + "int64x2_t b", + "const int lane2" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_s8", + "arguments": [ + "int8x16_t a", + "const int lane1", + "int8x16_t b", + "const int lane2" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "lane1": { + "minimum": 0, + "maximum": 15 + }, + "lane2": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_u16", + "arguments": [ + "uint16x8_t a", + "const int lane1", + "uint16x8_t b", + "const int lane2" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane1": { + "minimum": 0, + "maximum": 7 + }, + "lane2": { + "minimum": 0, + "maximum": 7 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_u32", + "arguments": [ + "uint32x4_t a", + "const int lane1", + "uint32x4_t b", + "const int lane2" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "lane1": { + "minimum": 0, + "maximum": 3 + }, + "lane2": { + "minimum": 0, + "maximum": 3 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_u64", + "arguments": [ + "uint64x2_t a", + "const int lane1", + "uint64x2_t b", + "const int lane2" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + }, + "b": { + "register": "Vn.2D" + }, + "lane1": { + "minimum": 0, + "maximum": 1 + }, + "lane2": { + "minimum": 0, + "maximum": 1 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcopyq_laneq_u8", + "arguments": [ + "uint8x16_t a", + "const int lane1", + "uint8x16_t b", + "const int lane2" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "lane1": { + "minimum": 0, + "maximum": 15 + }, + "lane2": { + "minimum": 0, + "maximum": 15 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_f16", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_f32", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_f64", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_p16", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_p64", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_p8", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_s16", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_s32", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_s64", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_s8", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_u16", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_u32", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_u64", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcreate_u8", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "INS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f16_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f16_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f16_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f32_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f32_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f32_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f32_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f64_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f64_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_f64_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_high_f16_f32", + "arguments": [ + "float16x4_t r", + "float32x4_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "r": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_high_f32_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_high_f32_f64", + "arguments": [ + "float32x2_t r", + "float64x2_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + }, + "r": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTN2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_high_f64_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTL2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_f16_s16", + "arguments": [ + "int16x4_t a", + "const int n" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_f16_u16", + "arguments": [ + "uint16x4_t a", + "const int n" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_f32_s32", + "arguments": [ + "int32x2_t a", + "const int n" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_f32_u32", + "arguments": [ + "uint32x2_t a", + "const int n" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_f64_s64", + "arguments": [ + "int64x1_t a", + "const int n" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_f64_u64", + "arguments": [ + "uint64x1_t a", + "const int n" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_s16_f16", + "arguments": [ + "float16x4_t a", + "const int n" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_s32_f32", + "arguments": [ + "float32x2_t a", + "const int n" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_s64_f64", + "arguments": [ + "float64x1_t a", + "const int n" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_u16_f16", + "arguments": [ + "float16x4_t a", + "const int n" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_u32_f32", + "arguments": [ + "float32x2_t a", + "const int n" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "n": { + "minimum": 1, + "maximum": 32 + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_n_u64_f64", + "arguments": [ + "float64x1_t a", + "const int n" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_s16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_s32_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_s64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_u16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_u32_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvt_u64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvta_s16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvta_s32_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvta_s64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvta_u16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvta_u32_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvta_u64_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtad_s64_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtad_u64_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtah_s16_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtah_s32_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtah_s64_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtah_u16_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtah_u32_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtah_u64_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtaq_s16_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtaq_s32_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtaq_s64_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtaq_u16_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtaq_u32_f32", + "arguments": [ + "float32x4_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtaq_u64_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtas_s32_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtas_u32_f32", + "arguments": [ + "float32_t a" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Sn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTAU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtd_f64_s64", + "arguments": [ + "int64_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtd_f64_u64", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtd_n_f64_s64", + "arguments": [ + "int64_t a", + "const int n" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtd_n_f64_u64", + "arguments": [ + "uint64_t a", + "const int n" + ], + "return_type": { + "value": "float64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtd_n_s64_f64", + "arguments": [ + "float64_t a", + "const int n" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtd_n_u64_f64", + "arguments": [ + "float64_t a", + "const int n" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + }, + "n": { + "minimum": 1, + "maximum": 64 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtd_s64_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvtd_u64_f64", + "arguments": [ + "float64_t a" + ], + "return_type": { + "value": "uint64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Dn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_f16_s16", + "arguments": [ + "int16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_f16_s32", + "arguments": [ + "int32_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_f16_s64", + "arguments": [ + "int64_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_f16_u16", + "arguments": [ + "uint16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_f16_u32", + "arguments": [ + "uint32_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_f16_u64", + "arguments": [ + "uint64_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_f16_s16", + "arguments": [ + "int16_t a", + "const int n" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_f16_s32", + "arguments": [ + "int32_t a", + "const int n" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_f16_s64", + "arguments": [ + "int64_t a", + "const int n" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "SCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_f16_u16", + "arguments": [ + "uint16_t a", + "const int n" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_f16_u32", + "arguments": [ + "uint32_t a", + "const int n" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_f16_u64", + "arguments": [ + "uint64_t a", + "const int n" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UCVTF" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_s16_f16", + "arguments": [ + "float16_t a", + "const int n" + ], + "return_type": { + "value": "int16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_s32_f16", + "arguments": [ + "float16_t a", + "const int n" + ], + "return_type": { + "value": "int32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_s64_f16", + "arguments": [ + "float16_t a", + "const int n" + ], + "return_type": { + "value": "int64_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_u16_f16", + "arguments": [ + "float16_t a", + "const int n" + ], + "return_type": { + "value": "uint16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FCVTZU" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vcvth_n_u32_f16", + "arguments": [ + "float16_t a", + "const int n" + ], + "return_type": { + "value": "uint32_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "n": { + "minimum": 1, + "maximum": 16 } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTZU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_lane_u32", + "name": "vcvth_n_u64_f16", "arguments": [ - "uint32x2_t a", - "const int lane1", - "uint32x2_t b", - "const int lane2" + "float16_t a", + "const int n" ], "return_type": { - "value": "uint32x2_t" + "value": "uint64_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.2S" - }, - "b": { - "register": "Vn.2S" - }, - "lane1": { - "minimum": 0, - "maximum": 1 + "register": "Hn" }, - "lane2": { - "minimum": 0, - "maximum": 1 + "n": { + "minimum": 1, + "maximum": 16 } }, "Architectures": [ @@ -15556,36 +21756,22 @@ ], "instructions": [ [ - "INS" + "FCVTZU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_lane_u64", + "name": "vcvth_s16_f16", "arguments": [ - "uint64x1_t a", - "const int lane1", - "uint64x1_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "uint64x1_t" + "value": "int16_t" }, "Arguments_Preparation": { "a": { - "register": "UNUSED" - }, - "b": { - "register": "Vn.1D" - }, - "lane1": { - "minimum": 0, - "maximum": 0 - }, - "lane2": { - "minimum": 0, - "maximum": 0 + "register": "Hn" } }, "Architectures": [ @@ -15593,73 +21779,46 @@ ], "instructions": [ [ - "DUP" + "FCVTZS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_lane_u8", + "name": "vcvth_s32_f16", "arguments": [ - "uint8x8_t a", - "const int lane1", - "uint8x8_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "uint8x8_t" + "value": "int32_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.8B" - }, - "b": { - "register": "Vn.8B" - }, - "lane1": { - "minimum": 0, - "maximum": 7 - }, - "lane2": { - "minimum": 0, - "maximum": 7 + "register": "Hn" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTZS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_laneq_f32", + "name": "vcvth_s64_f16", "arguments": [ - "float32x2_t a", - "const int lane1", - "float32x4_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "float32x2_t" + "value": "int64_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.2S" - }, - "b": { - "register": "Vn.4S" - }, - "lane1": { - "minimum": 0, - "maximum": 1 - }, - "lane2": { - "minimum": 0, - "maximum": 3 + "register": "Hn" } }, "Architectures": [ @@ -15667,36 +21826,22 @@ ], "instructions": [ [ - "INS" + "FCVTZS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_laneq_f64", + "name": "vcvth_u16_f16", "arguments": [ - "float64x1_t a", - "const int lane1", - "float64x2_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "float64x1_t" + "value": "uint16_t" }, "Arguments_Preparation": { "a": { - "register": "UNUSED" - }, - "b": { - "register": "Vn.2D" - }, - "lane1": { - "minimum": 0, - "maximum": 0 - }, - "lane2": { - "minimum": 0, - "maximum": 1 + "register": "Hn" } }, "Architectures": [ @@ -15704,185 +21849,117 @@ ], "instructions": [ [ - "DUP" + "FCVTZU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_laneq_p16", + "name": "vcvth_u32_f16", "arguments": [ - "poly16x4_t a", - "const int lane1", - "poly16x8_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "poly16x4_t" + "value": "uint32_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.4H" - }, - "b": { - "register": "Vn.8H" - }, - "lane1": { - "minimum": 0, - "maximum": 3 - }, - "lane2": { - "minimum": 0, - "maximum": 7 + "register": "Hn" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTZU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_laneq_p64", + "name": "vcvth_u64_f16", "arguments": [ - "poly64x1_t a", - "const int lane1", - "poly64x2_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "poly64x1_t" + "value": "uint64_t" }, "Arguments_Preparation": { "a": { - "register": "UNUSED" - }, - "b": { - "register": "Vn.2D" - }, - "lane1": { - "minimum": 0, - "maximum": 0 - }, - "lane2": { - "minimum": 0, - "maximum": 1 + "register": "Hn" } }, "Architectures": [ - "A32", "A64" ], "instructions": [ [ - "DUP" + "FCVTZU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_laneq_p8", + "name": "vcvtm_s16_f16", "arguments": [ - "poly8x8_t a", - "const int lane1", - "poly8x16_t b", - "const int lane2" + "float16x4_t a" ], "return_type": { - "value": "poly8x8_t" + "value": "int16x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.8B" - }, - "b": { - "register": "Vn.16B" - }, - "lane1": { - "minimum": 0, - "maximum": 7 - }, - "lane2": { - "minimum": 0, - "maximum": 15 + "register": "Vn.4H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTMS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_laneq_s16", + "name": "vcvtm_s32_f32", "arguments": [ - "int16x4_t a", - "const int lane1", - "int16x8_t b", - "const int lane2" + "float32x2_t a" ], "return_type": { - "value": "int16x4_t" + "value": "int32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.4H" - }, - "b": { - "register": "Vn.8H" - }, - "lane1": { - "minimum": 0, - "maximum": 3 - }, - "lane2": { - "minimum": 0, - "maximum": 7 + "register": "Vn.2S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTMS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_laneq_s32", + "name": "vcvtm_s64_f64", "arguments": [ - "int32x2_t a", - "const int lane1", - "int32x4_t b", - "const int lane2" + "float64x1_t a" ], "return_type": { - "value": "int32x2_t" + "value": "int64x1_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.2S" - }, - "b": { - "register": "Vn.4S" - }, - "lane1": { - "minimum": 0, - "maximum": 1 - }, - "lane2": { - "minimum": 0, - "maximum": 3 + "register": "Dn" } }, "Architectures": [ @@ -15890,110 +21967,70 @@ ], "instructions": [ [ - "INS" + "FCVTMS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_laneq_s64", + "name": "vcvtm_u16_f16", "arguments": [ - "int64x1_t a", - "const int lane1", - "int64x2_t b", - "const int lane2" + "float16x4_t a" ], "return_type": { - "value": "int64x1_t" + "value": "uint16x4_t" }, "Arguments_Preparation": { "a": { - "register": "UNUSED" - }, - "b": { - "register": "Vn.2D" - }, - "lane1": { - "minimum": 0, - "maximum": 0 - }, - "lane2": { - "minimum": 0, - "maximum": 1 + "register": "Vn.4H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "DUP" + "FCVTMU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_laneq_s8", + "name": "vcvtm_u32_f32", "arguments": [ - "int8x8_t a", - "const int lane1", - "int8x16_t b", - "const int lane2" + "float32x2_t a" ], "return_type": { - "value": "int8x8_t" + "value": "uint32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.8B" - }, - "b": { - "register": "Vn.16B" - }, - "lane1": { - "minimum": 0, - "maximum": 7 - }, - "lane2": { - "minimum": 0, - "maximum": 15 + "register": "Vn.2S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTMU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_laneq_u16", + "name": "vcvtm_u64_f64", "arguments": [ - "uint16x4_t a", - "const int lane1", - "uint16x8_t b", - "const int lane2" + "float64x1_t a" ], "return_type": { - "value": "uint16x4_t" + "value": "uint64x1_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.4H" - }, - "b": { - "register": "Vn.8H" - }, - "lane1": { - "minimum": 0, - "maximum": 3 - }, - "lane2": { - "minimum": 0, - "maximum": 7 + "register": "Dn" } }, "Architectures": [ @@ -16001,36 +22038,22 @@ ], "instructions": [ [ - "INS" + "FCVTMU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_laneq_u32", + "name": "vcvtmd_s64_f64", "arguments": [ - "uint32x2_t a", - "const int lane1", - "uint32x4_t b", - "const int lane2" + "float64_t a" ], "return_type": { - "value": "uint32x2_t" + "value": "int64_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.2S" - }, - "b": { - "register": "Vn.4S" - }, - "lane1": { - "minimum": 0, - "maximum": 1 - }, - "lane2": { - "minimum": 0, - "maximum": 3 + "register": "Dn" } }, "Architectures": [ @@ -16038,36 +22061,22 @@ ], "instructions": [ [ - "INS" + "FCVTMS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_laneq_u64", + "name": "vcvtmd_u64_f64", "arguments": [ - "uint64x1_t a", - "const int lane1", - "uint64x2_t b", - "const int lane2" + "float64_t a" ], "return_type": { - "value": "uint64x1_t" + "value": "uint64_t" }, "Arguments_Preparation": { "a": { - "register": "UNUSED" - }, - "b": { - "register": "Vn.2D" - }, - "lane1": { - "minimum": 0, - "maximum": 0 - }, - "lane2": { - "minimum": 0, - "maximum": 1 + "register": "Dn" } }, "Architectures": [ @@ -16075,36 +22084,22 @@ ], "instructions": [ [ - "DUP" + "FCVTMU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopy_laneq_u8", + "name": "vcvtmh_s16_f16", "arguments": [ - "uint8x8_t a", - "const int lane1", - "uint8x16_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "uint8x8_t" + "value": "int16_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.8B" - }, - "b": { - "register": "Vn.16B" - }, - "lane1": { - "minimum": 0, - "maximum": 7 - }, - "lane2": { - "minimum": 0, - "maximum": 15 + "register": "Hn" } }, "Architectures": [ @@ -16112,73 +22107,46 @@ ], "instructions": [ [ - "INS" + "FCVTMS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_lane_f32", + "name": "vcvtmh_s32_f16", "arguments": [ - "float32x4_t a", - "const int lane1", - "float32x2_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "float32x4_t" + "value": "int32_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.4S" - }, - "b": { - "register": "Vn.2S" - }, - "lane1": { - "minimum": 0, - "maximum": 3 - }, - "lane2": { - "minimum": 0, - "maximum": 1 + "register": "Hn" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTMS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_lane_f64", + "name": "vcvtmh_s64_f16", "arguments": [ - "float64x2_t a", - "const int lane1", - "float64x1_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "float64x2_t" + "value": "int64_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.2D" - }, - "b": { - "register": "Vn.1D" - }, - "lane1": { - "minimum": 0, - "maximum": 1 - }, - "lane2": { - "minimum": 0, - "maximum": 0 + "register": "Hn" } }, "Architectures": [ @@ -16186,36 +22154,22 @@ ], "instructions": [ [ - "INS" + "FCVTMS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_lane_p16", + "name": "vcvtmh_u16_f16", "arguments": [ - "poly16x8_t a", - "const int lane1", - "poly16x4_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "poly16x8_t" + "value": "uint16_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.8H" - }, - "b": { - "register": "Vn.4H" - }, - "lane1": { - "minimum": 0, - "maximum": 7 - }, - "lane2": { - "minimum": 0, - "maximum": 3 + "register": "Hn" } }, "Architectures": [ @@ -16223,36 +22177,22 @@ ], "instructions": [ [ - "INS" + "FCVTMU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_lane_p64", + "name": "vcvtmh_u32_f16", "arguments": [ - "poly64x2_t a", - "const int lane1", - "poly64x1_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "poly64x2_t" + "value": "uint32_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.2D" - }, - "b": { - "register": "Vn.1D" - }, - "lane1": { - "minimum": 0, - "maximum": 1 - }, - "lane2": { - "minimum": 0, - "maximum": 0 + "register": "Hn" } }, "Architectures": [ @@ -16261,36 +22201,22 @@ ], "instructions": [ [ - "INS" + "FCVTMU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_lane_p8", + "name": "vcvtmh_u64_f16", "arguments": [ - "poly8x16_t a", - "const int lane1", - "poly8x8_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "poly8x16_t" + "value": "uint64_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.16B" - }, - "b": { - "register": "Vn.8B" - }, - "lane1": { - "minimum": 0, - "maximum": 15 - }, - "lane2": { - "minimum": 0, - "maximum": 7 + "register": "Hn" } }, "Architectures": [ @@ -16298,110 +22224,70 @@ ], "instructions": [ [ - "INS" + "FCVTMU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_lane_s16", + "name": "vcvtmq_s16_f16", "arguments": [ - "int16x8_t a", - "const int lane1", - "int16x4_t b", - "const int lane2" - ], - "return_type": { - "value": "int16x8_t" - }, - "Arguments_Preparation": { - "a": { - "register": "Vd.8H" - }, - "b": { - "register": "Vn.4H" - }, - "lane1": { - "minimum": 0, - "maximum": 7 - }, - "lane2": { - "minimum": 0, - "maximum": 3 + "float16x8_t a" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTMS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_lane_s32", + "name": "vcvtmq_s32_f32", "arguments": [ - "int32x4_t a", - "const int lane1", - "int32x2_t b", - "const int lane2" + "float32x4_t a" ], "return_type": { "value": "int32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.4S" - }, - "b": { - "register": "Vn.2S" - }, - "lane1": { - "minimum": 0, - "maximum": 3 - }, - "lane2": { - "minimum": 0, - "maximum": 1 + "register": "Vn.4S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTMS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_lane_s64", + "name": "vcvtmq_s64_f64", "arguments": [ - "int64x2_t a", - "const int lane1", - "int64x1_t b", - "const int lane2" + "float64x2_t a" ], "return_type": { "value": "int64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.2D" - }, - "b": { - "register": "Vn.1D" - }, - "lane1": { - "minimum": 0, - "maximum": 1 - }, - "lane2": { - "minimum": 0, - "maximum": 0 + "register": "Vn.2D" } }, "Architectures": [ @@ -16409,110 +22295,70 @@ ], "instructions": [ [ - "INS" + "FCVTMS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_lane_s8", + "name": "vcvtmq_u16_f16", "arguments": [ - "int8x16_t a", - "const int lane1", - "int8x8_t b", - "const int lane2" + "float16x8_t a" ], "return_type": { - "value": "int8x16_t" + "value": "uint16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.16B" - }, - "b": { - "register": "Vn.8B" - }, - "lane1": { - "minimum": 0, - "maximum": 15 - }, - "lane2": { - "minimum": 0, - "maximum": 7 + "register": "Vn.8H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTMU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_lane_u16", + "name": "vcvtmq_u32_f32", "arguments": [ - "uint16x8_t a", - "const int lane1", - "uint16x4_t b", - "const int lane2" + "float32x4_t a" ], "return_type": { - "value": "uint16x8_t" + "value": "uint32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.8H" - }, - "b": { - "register": "Vn.4H" - }, - "lane1": { - "minimum": 0, - "maximum": 7 - }, - "lane2": { - "minimum": 0, - "maximum": 3 + "register": "Vn.4S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTMU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_lane_u32", + "name": "vcvtmq_u64_f64", "arguments": [ - "uint32x4_t a", - "const int lane1", - "uint32x2_t b", - "const int lane2" + "float64x2_t a" ], "return_type": { - "value": "uint32x4_t" + "value": "uint64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.4S" - }, - "b": { - "register": "Vn.2S" - }, - "lane1": { - "minimum": 0, - "maximum": 3 - }, - "lane2": { - "minimum": 0, - "maximum": 1 + "register": "Vn.2D" } }, "Architectures": [ @@ -16520,36 +22366,22 @@ ], "instructions": [ [ - "INS" + "FCVTMU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_lane_u64", + "name": "vcvtms_s32_f32", "arguments": [ - "uint64x2_t a", - "const int lane1", - "uint64x1_t b", - "const int lane2" + "float32_t a" ], "return_type": { - "value": "uint64x2_t" + "value": "int32_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.2D" - }, - "b": { - "register": "Vn.1D" - }, - "lane1": { - "minimum": 0, - "maximum": 1 - }, - "lane2": { - "minimum": 0, - "maximum": 0 + "register": "Sn" } }, "Architectures": [ @@ -16557,36 +22389,22 @@ ], "instructions": [ [ - "INS" + "FCVTMS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_lane_u8", + "name": "vcvtms_u32_f32", "arguments": [ - "uint8x16_t a", - "const int lane1", - "uint8x8_t b", - "const int lane2" + "float32_t a" ], "return_type": { - "value": "uint8x16_t" + "value": "uint32_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.16B" - }, - "b": { - "register": "Vn.8B" - }, - "lane1": { - "minimum": 0, - "maximum": 15 - }, - "lane2": { - "minimum": 0, - "maximum": 7 + "register": "Sn" } }, "Architectures": [ @@ -16594,110 +22412,70 @@ ], "instructions": [ [ - "INS" + "FCVTMU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_laneq_f32", + "name": "vcvtn_s16_f16", "arguments": [ - "float32x4_t a", - "const int lane1", - "float32x4_t b", - "const int lane2" + "float16x4_t a" ], "return_type": { - "value": "float32x4_t" + "value": "int16x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.4S" - }, - "b": { - "register": "Vn.4S" - }, - "lane1": { - "minimum": 0, - "maximum": 3 - }, - "lane2": { - "minimum": 0, - "maximum": 3 + "register": "Vn.4H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTNS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_laneq_f64", + "name": "vcvtn_s32_f32", "arguments": [ - "float64x2_t a", - "const int lane1", - "float64x2_t b", - "const int lane2" + "float32x2_t a" ], "return_type": { - "value": "float64x2_t" + "value": "int32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.2D" - }, - "b": { - "register": "Vn.2D" - }, - "lane1": { - "minimum": 0, - "maximum": 1 - }, - "lane2": { - "minimum": 0, - "maximum": 1 + "register": "Vn.2S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTNS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_laneq_p16", + "name": "vcvtn_s64_f64", "arguments": [ - "poly16x8_t a", - "const int lane1", - "poly16x8_t b", - "const int lane2" + "float64x1_t a" ], "return_type": { - "value": "poly16x8_t" + "value": "int64x1_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.8H" - }, - "b": { - "register": "Vn.8H" - }, - "lane1": { - "minimum": 0, - "maximum": 7 - }, - "lane2": { - "minimum": 0, - "maximum": 7 + "register": "Dn" } }, "Architectures": [ @@ -16705,36 +22483,22 @@ ], "instructions": [ [ - "INS" + "FCVTNS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_laneq_p64", + "name": "vcvtn_u16_f16", "arguments": [ - "poly64x2_t a", - "const int lane1", - "poly64x2_t b", - "const int lane2" + "float16x4_t a" ], "return_type": { - "value": "poly64x2_t" + "value": "uint16x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.2D" - }, - "b": { - "register": "Vn.2D" - }, - "lane1": { - "minimum": 0, - "maximum": 1 - }, - "lane2": { - "minimum": 0, - "maximum": 1 + "register": "Vn.4H" } }, "Architectures": [ @@ -16743,73 +22507,46 @@ ], "instructions": [ [ - "INS" + "FCVTNU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_laneq_p8", + "name": "vcvtn_u32_f32", "arguments": [ - "poly8x16_t a", - "const int lane1", - "poly8x16_t b", - "const int lane2" + "float32x2_t a" ], "return_type": { - "value": "poly8x16_t" + "value": "uint32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.16B" - }, - "b": { - "register": "Vn.16B" - }, - "lane1": { - "minimum": 0, - "maximum": 15 - }, - "lane2": { - "minimum": 0, - "maximum": 15 + "register": "Vn.2S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTNU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_laneq_s16", + "name": "vcvtn_u64_f64", "arguments": [ - "int16x8_t a", - "const int lane1", - "int16x8_t b", - "const int lane2" + "float64x1_t a" ], "return_type": { - "value": "int16x8_t" + "value": "uint64x1_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.8H" - }, - "b": { - "register": "Vn.8H" - }, - "lane1": { - "minimum": 0, - "maximum": 7 - }, - "lane2": { - "minimum": 0, - "maximum": 7 + "register": "Dn" } }, "Architectures": [ @@ -16817,36 +22554,22 @@ ], "instructions": [ [ - "INS" + "FCVTNU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_laneq_s32", + "name": "vcvtnd_s64_f64", "arguments": [ - "int32x4_t a", - "const int lane1", - "int32x4_t b", - "const int lane2" + "float64_t a" ], "return_type": { - "value": "int32x4_t" + "value": "int64_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.4S" - }, - "b": { - "register": "Vn.4S" - }, - "lane1": { - "minimum": 0, - "maximum": 3 - }, - "lane2": { - "minimum": 0, - "maximum": 3 + "register": "Dn" } }, "Architectures": [ @@ -16854,36 +22577,22 @@ ], "instructions": [ [ - "INS" + "FCVTNS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_laneq_s64", + "name": "vcvtnd_u64_f64", "arguments": [ - "int64x2_t a", - "const int lane1", - "int64x2_t b", - "const int lane2" + "float64_t a" ], "return_type": { - "value": "int64x2_t" + "value": "uint64_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.2D" - }, - "b": { - "register": "Vn.2D" - }, - "lane1": { - "minimum": 0, - "maximum": 1 - }, - "lane2": { - "minimum": 0, - "maximum": 1 + "register": "Dn" } }, "Architectures": [ @@ -16891,36 +22600,22 @@ ], "instructions": [ [ - "INS" + "FCVTNU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_laneq_s8", + "name": "vcvtnh_s16_f16", "arguments": [ - "int8x16_t a", - "const int lane1", - "int8x16_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "int8x16_t" + "value": "int16_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.16B" - }, - "b": { - "register": "Vn.16B" - }, - "lane1": { - "minimum": 0, - "maximum": 15 - }, - "lane2": { - "minimum": 0, - "maximum": 15 + "register": "Hn" } }, "Architectures": [ @@ -16928,73 +22623,46 @@ ], "instructions": [ [ - "INS" + "FCVTNS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_laneq_u16", + "name": "vcvtnh_s32_f16", "arguments": [ - "uint16x8_t a", - "const int lane1", - "uint16x8_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "uint16x8_t" + "value": "int32_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.8H" - }, - "b": { - "register": "Vn.8H" - }, - "lane1": { - "minimum": 0, - "maximum": 7 - }, - "lane2": { - "minimum": 0, - "maximum": 7 + "register": "Hn" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTNS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_laneq_u32", + "name": "vcvtnh_s64_f16", "arguments": [ - "uint32x4_t a", - "const int lane1", - "uint32x4_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "uint32x4_t" + "value": "int64_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.4S" - }, - "b": { - "register": "Vn.4S" - }, - "lane1": { - "minimum": 0, - "maximum": 3 - }, - "lane2": { - "minimum": 0, - "maximum": 3 + "register": "Hn" } }, "Architectures": [ @@ -17002,36 +22670,22 @@ ], "instructions": [ [ - "INS" + "FCVTNS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_laneq_u64", + "name": "vcvtnh_u16_f16", "arguments": [ - "uint64x2_t a", - "const int lane1", - "uint64x2_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "uint64x2_t" + "value": "uint16_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.2D" - }, - "b": { - "register": "Vn.2D" - }, - "lane1": { - "minimum": 0, - "maximum": 1 - }, - "lane2": { - "minimum": 0, - "maximum": 1 + "register": "Hn" } }, "Architectures": [ @@ -17039,454 +22693,422 @@ ], "instructions": [ [ - "INS" + "FCVTNU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcopyq_laneq_u8", + "name": "vcvtnh_u32_f16", "arguments": [ - "uint8x16_t a", - "const int lane1", - "uint8x16_t b", - "const int lane2" + "float16_t a" ], "return_type": { - "value": "uint8x16_t" + "value": "uint32_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.16B" - }, - "b": { - "register": "Vn.16B" - }, - "lane1": { - "minimum": 0, - "maximum": 15 - }, - "lane2": { - "minimum": 0, - "maximum": 15 + "register": "Hn" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTNU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcreate_f32", + "name": "vcvtnh_u64_f16", "arguments": [ - "uint64_t a" + "float16_t a" ], "return_type": { - "value": "float32x2_t" + "value": "uint64_t" }, "Arguments_Preparation": { "a": { - "register": "Xn" + "register": "Hn" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTNU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcreate_f64", + "name": "vcvtnq_s16_f16", "arguments": [ - "uint64_t a" + "float16x8_t a" ], "return_type": { - "value": "float64x1_t" + "value": "int16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Xn" + "register": "Vn.8H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTNS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcreate_p16", + "name": "vcvtnq_s32_f32", "arguments": [ - "uint64_t a" + "float32x4_t a" ], "return_type": { - "value": "poly16x4_t" + "value": "int32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Xn" + "register": "Vn.4S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTNS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcreate_p64", + "name": "vcvtnq_s64_f64", "arguments": [ - "uint64_t a" + "float64x2_t a" ], "return_type": { - "value": "poly64x1_t" + "value": "int64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Xn" + "register": "Vn.2D" } }, "Architectures": [ - "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTNS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcreate_p8", + "name": "vcvtnq_u16_f16", "arguments": [ - "uint64_t a" + "float16x8_t a" ], "return_type": { - "value": "poly8x8_t" + "value": "uint16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Xn" + "register": "Vn.8H" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTNU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcreate_s16", + "name": "vcvtnq_u32_f32", "arguments": [ - "uint64_t a" + "float32x4_t a" ], "return_type": { - "value": "int16x4_t" + "value": "uint32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Xn" + "register": "Vn.4S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTNU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcreate_s32", + "name": "vcvtnq_u64_f64", "arguments": [ - "uint64_t a" + "float64x2_t a" ], "return_type": { - "value": "int32x2_t" + "value": "uint64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Xn" + "register": "Vn.2D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTNU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcreate_s64", + "name": "vcvtns_s32_f32", "arguments": [ - "uint64_t a" + "float32_t a" ], "return_type": { - "value": "int64x1_t" + "value": "int32_t" }, "Arguments_Preparation": { "a": { - "register": "Xn" + "register": "Sn" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTNS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcreate_s8", + "name": "vcvtns_u32_f32", "arguments": [ - "uint64_t a" + "float32_t a" ], "return_type": { - "value": "int8x8_t" + "value": "uint32_t" }, "Arguments_Preparation": { "a": { - "register": "Xn" + "register": "Sn" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTNU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcreate_u16", + "name": "vcvtp_s16_f16", "arguments": [ - "uint64_t a" + "float16x4_t a" ], "return_type": { - "value": "uint16x4_t" + "value": "int16x4_t" }, "Arguments_Preparation": { "a": { - "register": "Xn" + "register": "Vn.4H" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTPS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcreate_u32", + "name": "vcvtp_s32_f32", "arguments": [ - "uint64_t a" + "float32x2_t a" ], "return_type": { - "value": "uint32x2_t" + "value": "int32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Xn" + "register": "Vn.2S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTPS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcreate_u64", + "name": "vcvtp_s64_f64", "arguments": [ - "uint64_t a" + "float64x1_t a" ], "return_type": { - "value": "uint64x1_t" + "value": "int64x1_t" }, "Arguments_Preparation": { "a": { - "register": "Xn" + "register": "Dn" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTPS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcreate_u8", + "name": "vcvtp_u16_f16", "arguments": [ - "uint64_t a" + "float16x4_t a" ], "return_type": { - "value": "uint8x8_t" + "value": "uint16x4_t" }, "Arguments_Preparation": { "a": { - "register": "Xn" + "register": "Vn.4H" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "INS" + "FCVTPU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_f32_f64", + "name": "vcvtp_u32_f32", "arguments": [ - "float64x2_t a" + "float32x2_t a" ], "return_type": { - "value": "float32x2_t" + "value": "uint32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2D" + "register": "Vn.2S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FCVTN" + "FCVTPU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_f32_s32", + "name": "vcvtp_u64_f64", "arguments": [ - "int32x2_t a" + "float64x1_t a" ], "return_type": { - "value": "float32x2_t" + "value": "uint64x1_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2S" + "register": "Dn" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "SCVTF" + "FCVTPU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_f32_u32", + "name": "vcvtpd_s64_f64", "arguments": [ - "uint32x2_t a" + "float64_t a" ], "return_type": { - "value": "float32x2_t" + "value": "int64_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2S" + "register": "Dn" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "UCVTF" + "FCVTPS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_f64_f32", + "name": "vcvtpd_u64_f64", "arguments": [ - "float32x2_t a" + "float64_t a" ], "return_type": { - "value": "float64x2_t" + "value": "uint64_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2S" + "register": "Dn" } }, "Architectures": [ @@ -17494,22 +23116,22 @@ ], "instructions": [ [ - "FCVTL" + "FCVTPU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_f64_s64", + "name": "vcvtph_s16_f16", "arguments": [ - "int64x1_t a" + "float16_t a" ], "return_type": { - "value": "float64x1_t" + "value": "int16_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Hn" } }, "Architectures": [ @@ -17517,49 +23139,46 @@ ], "instructions": [ [ - "SCVTF" + "FCVTPS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_f64_u64", + "name": "vcvtph_s32_f16", "arguments": [ - "uint64x1_t a" + "float16_t a" ], "return_type": { - "value": "float64x1_t" + "value": "int32_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Hn" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "UCVTF" + "FCVTPS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_high_f32_f64", + "name": "vcvtph_s64_f16", "arguments": [ - "float32x2_t r", - "float64x2_t a" + "float16_t a" ], "return_type": { - "value": "float32x4_t" + "value": "int64_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2D" - }, - "r": { - "register": "Vd.2S" + "register": "Hn" } }, "Architectures": [ @@ -17567,22 +23186,22 @@ ], "instructions": [ [ - "FCVTN2" + "FCVTPS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_high_f64_f32", + "name": "vcvtph_u16_f16", "arguments": [ - "float32x4_t a" + "float16_t a" ], "return_type": { - "value": "float64x2_t" + "value": "uint16_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.4S" + "register": "Hn" } }, "Architectures": [ @@ -17590,231 +23209,188 @@ ], "instructions": [ [ - "FCVTL2" + "FCVTPU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_n_f32_s32", + "name": "vcvtph_u32_f16", "arguments": [ - "int32x2_t a", - "const int n" + "float16_t a" ], "return_type": { - "value": "float32x2_t" + "value": "uint32_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2S" - }, - "n": { - "minimum": 1, - "maximum": 32 + "register": "Hn" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "SCVTF" + "FCVTPU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_n_f32_u32", + "name": "vcvtph_u64_f16", "arguments": [ - "uint32x2_t a", - "const int n" + "float16_t a" ], "return_type": { - "value": "float32x2_t" + "value": "uint64_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2S" - }, - "n": { - "minimum": 1, - "maximum": 32 + "register": "Hn" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "UCVTF" + "FCVTPU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_n_f64_s64", + "name": "vcvtpq_s16_f16", "arguments": [ - "int64x1_t a", - "const int n" + "float16x8_t a" ], "return_type": { - "value": "float64x1_t" + "value": "int16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" - }, - "n": { - "minimum": 1, - "maximum": 64 + "register": "Vn.8H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "SCVTF" + "FCVTPS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_n_f64_u64", + "name": "vcvtpq_s32_f32", "arguments": [ - "uint64x1_t a", - "const int n" + "float32x4_t a" ], "return_type": { - "value": "float64x1_t" + "value": "int32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" - }, - "n": { - "minimum": 1, - "maximum": 64 + "register": "Vn.4S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "UCVTF" + "FCVTPS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_n_s32_f32", + "name": "vcvtpq_s64_f64", "arguments": [ - "float32x2_t a", - "const int n" + "float64x2_t a" ], "return_type": { - "value": "int32x2_t" + "value": "int64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2S" - }, - "n": { - "minimum": 1, - "maximum": 32 + "register": "Vn.2D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "FCVTZS" + "FCVTPS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_n_s64_f64", + "name": "vcvtpq_u16_f16", "arguments": [ - "float64x1_t a", - "const int n" + "float16x8_t a" ], "return_type": { - "value": "int64x1_t" + "value": "uint16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" - }, - "n": { - "minimum": 1, - "maximum": 64 + "register": "Vn.8H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FCVTZS" + "FCVTPU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_n_u32_f32", + "name": "vcvtpq_u32_f32", "arguments": [ - "float32x2_t a", - "const int n" + "float32x4_t a" ], "return_type": { - "value": "uint32x2_t" + "value": "uint32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2S" - }, - "n": { - "minimum": 1, - "maximum": 32 + "register": "Vn.4S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "FCVTZU" + "FCVTPU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_n_u64_f64", + "name": "vcvtpq_u64_f64", "arguments": [ - "float64x1_t a", - "const int n" + "float64x2_t a" ], "return_type": { - "value": "uint64x1_t" + "value": "uint64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" - }, - "n": { - "minimum": 1, - "maximum": 64 + "register": "Vn.2D" } }, "Architectures": [ @@ -17822,47 +23398,45 @@ ], "instructions": [ [ - "FCVTZU" + "FCVTPU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_s32_f32", + "name": "vcvtps_s32_f32", "arguments": [ - "float32x2_t a" + "float32_t a" ], "return_type": { - "value": "int32x2_t" + "value": "int32_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2S" + "register": "Sn" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "FCVTZS" + "FCVTPS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_s64_f64", + "name": "vcvtps_u32_f32", "arguments": [ - "float64x1_t a" + "float32_t a" ], "return_type": { - "value": "int64x1_t" + "value": "uint32_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Sn" } }, "Architectures": [ @@ -17870,141 +23444,143 @@ ], "instructions": [ [ - "FCVTZS" + "FCVTPU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_u32_f32", + "name": "vcvtq_f16_s16", "arguments": [ - "float32x2_t a" + "int16x8_t a" ], "return_type": { - "value": "uint32x2_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2S" + "register": "Vn.8H" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "FCVTZU" + "SCVTF" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvt_u64_f64", + "name": "vcvtq_f16_u16", "arguments": [ - "float64x1_t a" + "uint16x8_t a" ], "return_type": { - "value": "uint64x1_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.8H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FCVTZU" + "UCVTF" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvta_s32_f32", + "name": "vcvtq_f32_s32", "arguments": [ - "float32x2_t a" + "int32x4_t a" ], "return_type": { - "value": "int32x2_t" + "value": "float32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2S" + "register": "Vn.4S" } }, "Architectures": [ + "v7", "A32", "A64" ], "instructions": [ [ - "FCVTAS" + "SCVTF" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvta_s64_f64", + "name": "vcvtq_f32_u32", "arguments": [ - "float64x1_t a" + "uint32x4_t a" ], "return_type": { - "value": "int64x1_t" + "value": "float32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.4S" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "FCVTAS" + "UCVTF" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvta_u32_f32", + "name": "vcvtq_f64_s64", "arguments": [ - "float32x2_t a" + "int64x2_t a" ], "return_type": { - "value": "uint32x2_t" + "value": "float64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2S" + "register": "Vn.2D" } }, "Architectures": [ - "A32", "A64" ], "instructions": [ [ - "FCVTAU" + "SCVTF" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvta_u64_f64", + "name": "vcvtq_f64_u64", "arguments": [ - "float64x1_t a" + "uint64x2_t a" ], "return_type": { - "value": "uint64x1_t" + "value": "float64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.2D" } }, "Architectures": [ @@ -18012,139 +23588,173 @@ ], "instructions": [ [ - "FCVTAU" + "UCVTF" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtad_s64_f64", + "name": "vcvtq_n_f16_s16", "arguments": [ - "float64_t a" + "int16x8_t a", + "const int n" ], "return_type": { - "value": "int64_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FCVTAS" + "SCVTF" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtad_u64_f64", + "name": "vcvtq_n_f16_u16", "arguments": [ - "float64_t a" + "uint16x8_t a", + "const int n" ], "return_type": { - "value": "uint64_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FCVTAU" + "UCVTF" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtaq_s32_f32", + "name": "vcvtq_n_f32_s32", "arguments": [ - "float32x4_t a" + "int32x4_t a", + "const int n" ], "return_type": { - "value": "int32x4_t" + "value": "float32x4_t" }, "Arguments_Preparation": { "a": { "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 } }, "Architectures": [ + "v7", "A32", "A64" ], "instructions": [ [ - "FCVTAS" + "SCVTF" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtaq_s64_f64", + "name": "vcvtq_n_f32_u32", "arguments": [ - "float64x2_t a" + "uint32x4_t a", + "const int n" ], "return_type": { - "value": "int64x2_t" + "value": "float32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2D" + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "FCVTAS" + "UCVTF" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtaq_u32_f32", + "name": "vcvtq_n_f64_s64", "arguments": [ - "float32x4_t a" + "int64x2_t a", + "const int n" ], "return_type": { - "value": "uint32x4_t" + "value": "float64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.4S" + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 } }, "Architectures": [ - "A32", "A64" ], "instructions": [ [ - "FCVTAU" + "SCVTF" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtaq_u64_f64", + "name": "vcvtq_n_f64_u64", "arguments": [ - "float64x2_t a" + "uint64x2_t a", + "const int n" ], "return_type": { - "value": "uint64x2_t" + "value": "float64x2_t" }, "Arguments_Preparation": { "a": { "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 } }, "Architectures": [ @@ -18152,68 +23762,86 @@ ], "instructions": [ [ - "FCVTAU" + "UCVTF" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtas_s32_f32", + "name": "vcvtq_n_s16_f16", "arguments": [ - "float32_t a" + "float16x8_t a", + "const int n" ], "return_type": { - "value": "int32_t" + "value": "int16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Sn" + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FCVTAS" + "FCVTZS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtas_u32_f32", + "name": "vcvtq_n_s32_f32", "arguments": [ - "float32_t a" + "float32x4_t a", + "const int n" ], "return_type": { - "value": "uint32_t" + "value": "int32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Sn" + "register": "Vn.4S" + }, + "n": { + "minimum": 1, + "maximum": 32 } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "FCVTAU" + "FCVTZS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtd_f64_s64", + "name": "vcvtq_n_s64_f64", "arguments": [ - "int64_t a" + "float64x2_t a", + "const int n" ], "return_type": { - "value": "float64_t" + "value": "int64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.2D" + }, + "n": { + "minimum": 1, + "maximum": 64 } }, "Architectures": [ @@ -18221,74 +23849,82 @@ ], "instructions": [ [ - "SCVTF" + "FCVTZS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtd_f64_u64", + "name": "vcvtq_n_u16_f16", "arguments": [ - "uint64_t a" + "float16x8_t a", + "const int n" ], "return_type": { - "value": "float64_t" + "value": "uint16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.8H" + }, + "n": { + "minimum": 1, + "maximum": 16 } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "UCVTF" + "FCVTZU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtd_n_f64_s64", + "name": "vcvtq_n_u32_f32", "arguments": [ - "int64_t a", + "float32x4_t a", "const int n" ], "return_type": { - "value": "float64_t" + "value": "uint32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.4S" }, "n": { "minimum": 1, - "maximum": 64 + "maximum": 32 } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "SCVTF" + "FCVTZU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtd_n_f64_u64", + "name": "vcvtq_n_u64_f64", "arguments": [ - "uint64_t a", + "float64x2_t a", "const int n" ], "return_type": { - "value": "float64_t" + "value": "uint64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.2D" }, "n": { "minimum": 1, @@ -18300,30 +23936,26 @@ ], "instructions": [ [ - "UCVTF" + "FCVTZU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtd_n_s64_f64", + "name": "vcvtq_s16_f16", "arguments": [ - "float64_t a", - "const int n" + "float16x8_t a" ], "return_type": { - "value": "int64_t" + "value": "int16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" - }, - "n": { - "minimum": 1, - "maximum": 64 + "register": "Vn.8H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ @@ -18334,44 +23966,41 @@ }, { "SIMD_ISA": "Neon", - "name": "vcvtd_n_u64_f64", + "name": "vcvtq_s32_f32", "arguments": [ - "float64_t a", - "const int n" + "float32x4_t a" ], "return_type": { - "value": "uint64_t" + "value": "int32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" - }, - "n": { - "minimum": 1, - "maximum": 64 + "register": "Vn.4S" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "FCVTZU" + "FCVTZS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtd_s64_f64", + "name": "vcvtq_s64_f64", "arguments": [ - "float64_t a" + "float64x2_t a" ], "return_type": { - "value": "int64_t" + "value": "int64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.2D" } }, "Architectures": [ @@ -18385,63 +24014,65 @@ }, { "SIMD_ISA": "Neon", - "name": "vcvtd_u64_f64", + "name": "vcvtq_u16_f16", "arguments": [ - "float64_t a" + "float16x8_t a" ], "return_type": { - "value": "uint64_t" + "value": "uint16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.8H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FCVTZU" + "FCVTZS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtm_s32_f32", + "name": "vcvtq_u32_f32", "arguments": [ - "float32x2_t a" + "float32x4_t a" ], "return_type": { - "value": "int32x2_t" + "value": "uint32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2S" + "register": "Vn.4S" } }, "Architectures": [ + "v7", "A32", "A64" ], "instructions": [ [ - "FCVTMS" + "FCVTZU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtm_s64_f64", + "name": "vcvtq_u64_f64", "arguments": [ - "float64x1_t a" + "float64x2_t a" ], "return_type": { - "value": "int64x1_t" + "value": "uint64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.2D" } }, "Architectures": [ @@ -18449,46 +24080,45 @@ ], "instructions": [ [ - "FCVTMS" + "FCVTZU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtm_u32_f32", + "name": "vcvts_f32_s32", "arguments": [ - "float32x2_t a" + "int32_t a" ], "return_type": { - "value": "uint32x2_t" + "value": "float32_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2S" + "register": "Sn" } }, "Architectures": [ - "A32", "A64" ], "instructions": [ [ - "FCVTMU" + "SCVTF" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtm_u64_f64", + "name": "vcvts_f32_u32", "arguments": [ - "float64x1_t a" + "uint32_t a" ], "return_type": { - "value": "uint64x1_t" + "value": "float32_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Sn" } }, "Architectures": [ @@ -18496,22 +24126,27 @@ ], "instructions": [ [ - "FCVTMU" + "UCVTF" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtmd_s64_f64", + "name": "vcvts_n_f32_s32", "arguments": [ - "float64_t a" + "int32_t a", + "const int n" ], "return_type": { - "value": "int64_t" + "value": "float32_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Sn" + }, + "n": { + "minimum": 1, + "maximum": 32 } }, "Architectures": [ @@ -18519,22 +24154,27 @@ ], "instructions": [ [ - "FCVTMS" + "SCVTF" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtmd_u64_f64", + "name": "vcvts_n_f32_u32", "arguments": [ - "float64_t a" + "uint32_t a", + "const int n" ], "return_type": { - "value": "uint64_t" + "value": "float32_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Sn" + }, + "n": { + "minimum": 1, + "maximum": 32 } }, "Architectures": [ @@ -18542,46 +24182,55 @@ ], "instructions": [ [ - "FCVTMU" + "UCVTF" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtmq_s32_f32", + "name": "vcvts_n_s32_f32", "arguments": [ - "float32x4_t a" + "float32_t a", + "const int n" ], "return_type": { - "value": "int32x4_t" + "value": "int32_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.4S" + "register": "Sn" + }, + "n": { + "minimum": 1, + "maximum": 32 } }, "Architectures": [ - "A32", "A64" ], "instructions": [ [ - "FCVTMS" + "FCVTZS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtmq_s64_f64", + "name": "vcvts_n_u32_f32", "arguments": [ - "float64x2_t a" + "float32_t a", + "const int n" ], "return_type": { - "value": "int64x2_t" + "value": "uint32_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2D" + "register": "Sn" + }, + "n": { + "minimum": 1, + "maximum": 32 } }, "Architectures": [ @@ -18589,46 +24238,45 @@ ], "instructions": [ [ - "FCVTMS" + "FCVTZU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtmq_u32_f32", + "name": "vcvts_s32_f32", "arguments": [ - "float32x4_t a" + "float32_t a" ], "return_type": { - "value": "uint32x4_t" + "value": "int32_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.4S" + "register": "Sn" } }, "Architectures": [ - "A32", "A64" ], "instructions": [ [ - "FCVTMU" + "FCVTZS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtmq_u64_f64", + "name": "vcvts_u32_f32", "arguments": [ - "float64x2_t a" + "float32_t a" ], "return_type": { - "value": "uint64x2_t" + "value": "uint32_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2D" + "register": "Sn" } }, "Architectures": [ @@ -18636,22 +24284,22 @@ ], "instructions": [ [ - "FCVTMU" + "FCVTZU" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtms_s32_f32", + "name": "vcvtx_f32_f64", "arguments": [ - "float32_t a" + "float64x2_t a" ], "return_type": { - "value": "int32_t" + "value": "float32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Sn" + "register": "Vn.2D" } }, "Architectures": [ @@ -18659,22 +24307,26 @@ ], "instructions": [ [ - "FCVTMS" + "FCVTXN" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtms_u32_f32", + "name": "vcvtx_high_f32_f64", "arguments": [ - "float32_t a" + "float32x2_t r", + "float64x2_t a" ], "return_type": { - "value": "uint32_t" + "value": "float32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Sn" + "register": "Vn.2D" + }, + "r": { + "register": "Vd.2S" } }, "Architectures": [ @@ -18682,46 +24334,49 @@ ], "instructions": [ [ - "FCVTMU" + "FCVTXN2" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtn_s32_f32", + "name": "vcvtxd_f32_f64", "arguments": [ - "float32x2_t a" + "float64_t a" ], "return_type": { - "value": "int32x2_t" + "value": "float32_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2S" + "register": "Dn" } }, "Architectures": [ - "A32", "A64" ], "instructions": [ [ - "FCVTNS" + "FCVTXN" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtn_s64_f64", + "name": "vdiv_f16", "arguments": [ - "float64x1_t a" + "float16x4_t a", + "float16x4_t b" ], "return_type": { - "value": "int64x1_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" } }, "Architectures": [ @@ -18729,46 +24384,53 @@ ], "instructions": [ [ - "FCVTNS" + "FDIV" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtn_u32_f32", + "name": "vdiv_f32", "arguments": [ - "float32x2_t a" + "float32x2_t a", + "float32x2_t b" ], "return_type": { - "value": "uint32x2_t" + "value": "float32x2_t" }, "Arguments_Preparation": { "a": { "register": "Vn.2S" + }, + "b": { + "register": "Vm.2S" } }, "Architectures": [ - "A32", "A64" ], "instructions": [ [ - "FCVTNU" + "FDIV" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtn_u64_f64", + "name": "vdiv_f64", "arguments": [ - "float64x1_t a" + "float64x1_t a", + "float64x1_t b" ], "return_type": { - "value": "uint64x1_t" + "value": "float64x1_t" }, "Arguments_Preparation": { "a": { "register": "Dn" + }, + "b": { + "register": "Dm" } }, "Architectures": [ @@ -18776,45 +24438,54 @@ ], "instructions": [ [ - "FCVTNU" + "FDIV" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtnd_s64_f64", + "name": "vdivh_f16", "arguments": [ - "float64_t a" + "float16_t a", + "float16_t b" ], "return_type": { - "value": "int64_t" + "value": "float16_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Hn" + }, + "b": { + "register": "Hm" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FCVTNS" + "FDIV" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtnd_u64_f64", + "name": "vdivq_f16", "arguments": [ - "float64_t a" + "float16x8_t a", + "float16x8_t b" ], "return_type": { - "value": "uint64_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" } }, "Architectures": [ @@ -18822,46 +24493,53 @@ ], "instructions": [ [ - "FCVTNU" + "FDIV" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtnq_s32_f32", + "name": "vdivq_f32", "arguments": [ - "float32x4_t a" + "float32x4_t a", + "float32x4_t b" ], "return_type": { - "value": "int32x4_t" + "value": "float32x4_t" }, "Arguments_Preparation": { "a": { "register": "Vn.4S" + }, + "b": { + "register": "Vm.4S" } }, "Architectures": [ - "A32", "A64" ], "instructions": [ [ - "FCVTNS" + "FDIV" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtnq_s64_f64", + "name": "vdivq_f64", "arguments": [ - "float64x2_t a" + "float64x2_t a", + "float64x2_t b" ], "return_type": { - "value": "int64x2_t" + "value": "float64x2_t" }, "Arguments_Preparation": { "a": { "register": "Vn.2D" + }, + "b": { + "register": "Vm.2D" } }, "Architectures": [ @@ -18869,22 +24547,35 @@ ], "instructions": [ [ - "FCVTNS" + "FDIV" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtnq_u32_f32", + "name": "vdot_lane_s32", "arguments": [ - "float32x4_t a" + "int32x2_t r", + "int8x8_t a", + "int8x8_t b", + "const int lane" ], "return_type": { - "value": "uint32x4_t" + "value": "int32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.4S" + "register": "Vn.8B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.2S" } }, "Architectures": [ @@ -18893,45 +24584,72 @@ ], "instructions": [ [ - "FCVTNU" + "SDOT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtnq_u64_f64", + "name": "vdot_lane_u32", "arguments": [ - "float64x2_t a" + "uint32x2_t r", + "uint8x8_t a", + "uint8x8_t b", + "const int lane" ], "return_type": { - "value": "uint64x2_t" + "value": "uint32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2D" + "register": "Vn.8B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.2S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FCVTNU" + "UDOT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtns_s32_f32", + "name": "vdot_laneq_s32", "arguments": [ - "float32_t a" + "int32x2_t r", + "int8x8_t a", + "int8x16_t b", + "const int lane" ], "return_type": { - "value": "int32_t" + "value": "int32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Sn" + "register": "Vn.8B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.2S" } }, "Architectures": [ @@ -18939,22 +24657,35 @@ ], "instructions": [ [ - "FCVTNS" + "SDOT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtns_u32_f32", + "name": "vdot_laneq_u32", "arguments": [ - "float32_t a" + "uint32x2_t r", + "uint8x8_t a", + "uint8x16_t b", + "const int lane" ], "return_type": { - "value": "uint32_t" + "value": "uint32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Sn" + "register": "Vn.8B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.2S" } }, "Architectures": [ @@ -18962,22 +24693,30 @@ ], "instructions": [ [ - "FCVTNU" + "UDOT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtp_s32_f32", + "name": "vdot_s32", "arguments": [ - "float32x2_t a" + "int32x2_t r", + "int8x8_t a", + "int8x8_t b" ], "return_type": { "value": "int32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2S" + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "r": { + "register": "Vd.2S" } }, "Architectures": [ @@ -18986,45 +24725,67 @@ ], "instructions": [ [ - "FCVTPS" + "SDOT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtp_s64_f64", + "name": "vdot_u32", "arguments": [ - "float64x1_t a" + "uint32x2_t r", + "uint8x8_t a", + "uint8x8_t b" ], "return_type": { - "value": "int64x1_t" + "value": "uint32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "r": { + "register": "Vd.2S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FCVTPS" + "UDOT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtp_u32_f32", + "name": "vdotq_lane_s32", "arguments": [ - "float32x2_t a" + "int32x4_t r", + "int8x16_t a", + "int8x8_t b", + "const int lane" ], "return_type": { - "value": "uint32x2_t" + "value": "int32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2S" + "register": "Vn.16B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4S" } }, "Architectures": [ @@ -19033,45 +24794,72 @@ ], "instructions": [ [ - "FCVTPU" + "SDOT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtp_u64_f64", + "name": "vdotq_lane_u32", "arguments": [ - "float64x1_t a" + "uint32x4_t r", + "uint8x16_t a", + "uint8x8_t b", + "const int lane" ], "return_type": { - "value": "uint64x1_t" + "value": "uint32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.16B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.4S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FCVTPU" + "UDOT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtpd_s64_f64", + "name": "vdotq_laneq_s32", "arguments": [ - "float64_t a" + "int32x4_t r", + "int8x16_t a", + "int8x16_t b", + "const int lane" ], "return_type": { - "value": "int64_t" + "value": "int32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.16B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.4S" } }, "Architectures": [ @@ -19079,22 +24867,35 @@ ], "instructions": [ [ - "FCVTPS" + "SDOT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtpd_u64_f64", + "name": "vdotq_laneq_u32", "arguments": [ - "float64_t a" + "uint32x4_t r", + "uint8x16_t a", + "uint8x16_t b", + "const int lane" ], "return_type": { - "value": "uint64_t" + "value": "uint32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Dn" + "register": "Vn.16B" + }, + "b": { + "register": "Vm.4B" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.4S" } }, "Architectures": [ @@ -19102,22 +24903,30 @@ ], "instructions": [ [ - "FCVTPU" + "UDOT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtpq_s32_f32", + "name": "vdotq_s32", "arguments": [ - "float32x4_t a" + "int32x4_t r", + "int8x16_t a", + "int8x16_t b" ], "return_type": { "value": "int32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.4S" + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "r": { + "register": "Vd.4S" } }, "Architectures": [ @@ -19126,92 +24935,119 @@ ], "instructions": [ [ - "FCVTPS" + "SDOT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtpq_s64_f64", + "name": "vdotq_u32", "arguments": [ - "float64x2_t a" + "uint32x4_t r", + "uint8x16_t a", + "uint8x16_t b" ], "return_type": { - "value": "int64x2_t" + "value": "uint32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.2D" + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" + }, + "r": { + "register": "Vd.4S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FCVTPS" + "UDOT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtpq_u32_f32", + "name": "vdup_lane_f16", "arguments": [ - "float32x4_t a" + "float16x4_t vec", + "const int lane" ], "return_type": { - "value": "uint32x4_t" + "value": "float16x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.4S" + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4H" } }, "Architectures": [ + "v7", "A32", "A64" ], "instructions": [ [ - "FCVTPU" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtpq_u64_f64", + "name": "vdup_lane_f32", "arguments": [ - "float64x2_t a" + "float32x2_t vec", + "const int lane" ], "return_type": { - "value": "uint64x2_t" + "value": "float32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.2D" + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2S" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "FCVTPU" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtps_s32_f32", + "name": "vdup_lane_f64", "arguments": [ - "float32_t a" + "float64x1_t vec", + "const int lane" ], "return_type": { - "value": "int32_t" + "value": "float64x1_t" }, "Arguments_Preparation": { - "a": { - "register": "Sn" + "lane": { + "minimum": 0, + "maximum": 0 + }, + "vec": { + "register": "Vn.1D" } }, "Architectures": [ @@ -19219,70 +25055,86 @@ ], "instructions": [ [ - "FCVTPS" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtps_u32_f32", + "name": "vdup_lane_p16", "arguments": [ - "float32_t a" + "poly16x4_t vec", + "const int lane" ], "return_type": { - "value": "uint32_t" + "value": "poly16x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Sn" + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4H" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "FCVTPU" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtq_f32_s32", + "name": "vdup_lane_p64", "arguments": [ - "int32x4_t a" + "poly64x1_t vec", + "const int lane" ], "return_type": { - "value": "float32x4_t" + "value": "poly64x1_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.4S" + "lane": { + "minimum": 0, + "maximum": 0 + }, + "vec": { + "register": "Vn.1D" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "SCVTF" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtq_f32_u32", + "name": "vdup_lane_p8", "arguments": [ - "uint32x4_t a" + "poly8x8_t vec", + "const int lane" ], "return_type": { - "value": "float32x4_t" + "value": "poly8x8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.4S" + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8B" } }, "Architectures": [ @@ -19292,73 +25144,87 @@ ], "instructions": [ [ - "UCVTF" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtq_f64_s64", + "name": "vdup_lane_s16", "arguments": [ - "int64x2_t a" + "int16x4_t vec", + "const int lane" ], "return_type": { - "value": "float64x2_t" + "value": "int16x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.2D" + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4H" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "SCVTF" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtq_f64_u64", + "name": "vdup_lane_s32", "arguments": [ - "uint64x2_t a" + "int32x2_t vec", + "const int lane" ], "return_type": { - "value": "float64x2_t" + "value": "int32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.2D" + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2S" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "UCVTF" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtq_n_f32_s32", + "name": "vdup_lane_s64", "arguments": [ - "int32x4_t a", - "const int n" + "int64x1_t vec", + "const int lane" ], "return_type": { - "value": "float32x4_t" + "value": "int64x1_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.4S" + "lane": { + "minimum": 0, + "maximum": 0 }, - "n": { - "minimum": 1, - "maximum": 32 + "vec": { + "register": "Vn.1D" } }, "Architectures": [ @@ -19368,27 +25234,27 @@ ], "instructions": [ [ - "SCVTF" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtq_n_f32_u32", + "name": "vdup_lane_s8", "arguments": [ - "uint32x4_t a", - "const int n" + "int8x8_t vec", + "const int lane" ], "return_type": { - "value": "float32x4_t" + "value": "int8x8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.4S" + "lane": { + "minimum": 0, + "maximum": 7 }, - "n": { - "minimum": 1, - "maximum": 32 + "vec": { + "register": "Vn.8B" } }, "Architectures": [ @@ -19398,83 +25264,87 @@ ], "instructions": [ [ - "UCVTF" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtq_n_f64_s64", + "name": "vdup_lane_u16", "arguments": [ - "int64x2_t a", - "const int n" + "uint16x4_t vec", + "const int lane" ], "return_type": { - "value": "float64x2_t" + "value": "uint16x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.2D" + "lane": { + "minimum": 0, + "maximum": 3 }, - "n": { - "minimum": 1, - "maximum": 64 + "vec": { + "register": "Vn.4H" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "SCVTF" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtq_n_f64_u64", + "name": "vdup_lane_u32", "arguments": [ - "uint64x2_t a", - "const int n" + "uint32x2_t vec", + "const int lane" ], "return_type": { - "value": "float64x2_t" + "value": "uint32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.2D" + "lane": { + "minimum": 0, + "maximum": 1 }, - "n": { - "minimum": 1, - "maximum": 64 + "vec": { + "register": "Vn.2S" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "UCVTF" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtq_n_s32_f32", + "name": "vdup_lane_u64", "arguments": [ - "float32x4_t a", - "const int n" + "uint64x1_t vec", + "const int lane" ], "return_type": { - "value": "int32x4_t" + "value": "uint64x1_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.4S" + "lane": { + "minimum": 0, + "maximum": 0 }, - "n": { - "minimum": 1, - "maximum": 32 + "vec": { + "register": "Vn.1D" } }, "Architectures": [ @@ -19484,85 +25354,85 @@ ], "instructions": [ [ - "FCVTZS" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtq_n_s64_f64", + "name": "vdup_lane_u8", "arguments": [ - "float64x2_t a", - "const int n" + "uint8x8_t vec", + "const int lane" ], "return_type": { - "value": "int64x2_t" + "value": "uint8x8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.2D" + "lane": { + "minimum": 0, + "maximum": 7 }, - "n": { - "minimum": 1, - "maximum": 64 + "vec": { + "register": "Vn.8B" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "FCVTZS" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtq_n_u32_f32", + "name": "vdup_laneq_f16", "arguments": [ - "float32x4_t a", - "const int n" + "float16x8_t vec", + "const int lane" ], "return_type": { - "value": "uint32x4_t" + "value": "float16x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.4S" + "lane": { + "minimum": 0, + "maximum": 7 }, - "n": { - "minimum": 1, - "maximum": 32 + "vec": { + "register": "Vn.8H" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "FCVTZU" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtq_n_u64_f64", + "name": "vdup_laneq_f32", "arguments": [ - "float64x2_t a", - "const int n" + "float32x4_t vec", + "const int lane" ], "return_type": { - "value": "uint64x2_t" + "value": "float32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.2D" + "lane": { + "minimum": 0, + "maximum": 3 }, - "n": { - "minimum": 1, - "maximum": 64 + "vec": { + "register": "Vn.4S" } }, "Architectures": [ @@ -19570,47 +25440,55 @@ ], "instructions": [ [ - "FCVTZU" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtq_s32_f32", + "name": "vdup_laneq_f64", "arguments": [ - "float32x4_t a" + "float64x2_t vec", + "const int lane" ], "return_type": { - "value": "int32x4_t" + "value": "float64x1_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.4S" + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "FCVTZS" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtq_s64_f64", + "name": "vdup_laneq_p16", "arguments": [ - "float64x2_t a" + "poly16x8_t vec", + "const int lane" ], "return_type": { - "value": "int64x2_t" + "value": "poly16x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.2D" + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" } }, "Architectures": [ @@ -19618,47 +25496,55 @@ ], "instructions": [ [ - "FCVTZS" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtq_u32_f32", + "name": "vdup_laneq_p64", "arguments": [ - "float32x4_t a" + "poly64x2_t vec", + "const int lane" ], "return_type": { - "value": "uint32x4_t" + "value": "poly64x1_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.4S" + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "FCVTZU" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtq_u64_f64", + "name": "vdup_laneq_p8", "arguments": [ - "float64x2_t a" + "poly8x16_t vec", + "const int lane" ], "return_type": { - "value": "uint64x2_t" + "value": "poly8x8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.2D" + "lane": { + "minimum": 0, + "maximum": 15 + }, + "vec": { + "register": "Vn.16B" } }, "Architectures": [ @@ -19666,22 +25552,27 @@ ], "instructions": [ [ - "FCVTZU" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvts_f32_s32", + "name": "vdup_laneq_s16", "arguments": [ - "int32_t a" + "int16x8_t vec", + "const int lane" ], "return_type": { - "value": "float32_t" + "value": "int16x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Sn" + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" } }, "Architectures": [ @@ -19689,22 +25580,27 @@ ], "instructions": [ [ - "SCVTF" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvts_f32_u32", + "name": "vdup_laneq_s32", "arguments": [ - "uint32_t a" + "int32x4_t vec", + "const int lane" ], "return_type": { - "value": "float32_t" + "value": "int32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Sn" + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4S" } }, "Architectures": [ @@ -19712,27 +25608,27 @@ ], "instructions": [ [ - "UCVTF" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvts_n_f32_s32", + "name": "vdup_laneq_s64", "arguments": [ - "int32_t a", - "const int n" + "int64x2_t vec", + "const int lane" ], "return_type": { - "value": "float32_t" + "value": "int64x1_t" }, "Arguments_Preparation": { - "a": { - "register": "Sn" + "lane": { + "minimum": 0, + "maximum": 1 }, - "n": { - "minimum": 1, - "maximum": 32 + "vec": { + "register": "Vn.2D" } }, "Architectures": [ @@ -19740,27 +25636,27 @@ ], "instructions": [ [ - "SCVTF" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvts_n_f32_u32", + "name": "vdup_laneq_s8", "arguments": [ - "uint32_t a", - "const int n" + "int8x16_t vec", + "const int lane" ], "return_type": { - "value": "float32_t" + "value": "int8x8_t" }, "Arguments_Preparation": { - "a": { - "register": "Sn" + "lane": { + "minimum": 0, + "maximum": 15 }, - "n": { - "minimum": 1, - "maximum": 32 + "vec": { + "register": "Vn.16B" } }, "Architectures": [ @@ -19768,27 +25664,27 @@ ], "instructions": [ [ - "UCVTF" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvts_n_s32_f32", + "name": "vdup_laneq_u16", "arguments": [ - "float32_t a", - "const int n" + "uint16x8_t vec", + "const int lane" ], "return_type": { - "value": "int32_t" + "value": "uint16x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Sn" + "lane": { + "minimum": 0, + "maximum": 7 }, - "n": { - "minimum": 1, - "maximum": 32 + "vec": { + "register": "Vn.8H" } }, "Architectures": [ @@ -19796,27 +25692,27 @@ ], "instructions": [ [ - "FCVTZS" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvts_n_u32_f32", + "name": "vdup_laneq_u32", "arguments": [ - "float32_t a", - "const int n" + "uint32x4_t vec", + "const int lane" ], "return_type": { - "value": "uint32_t" + "value": "uint32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Sn" + "lane": { + "minimum": 0, + "maximum": 3 }, - "n": { - "minimum": 1, - "maximum": 32 + "vec": { + "register": "Vn.4S" } }, "Architectures": [ @@ -19824,22 +25720,27 @@ ], "instructions": [ [ - "FCVTZU" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvts_s32_f32", + "name": "vdup_laneq_u64", "arguments": [ - "float32_t a" + "uint64x2_t vec", + "const int lane" ], "return_type": { - "value": "int32_t" + "value": "uint64x1_t" }, "Arguments_Preparation": { - "a": { - "register": "Sn" + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" } }, "Architectures": [ @@ -19847,22 +25748,27 @@ ], "instructions": [ [ - "FCVTZS" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvts_u32_f32", + "name": "vdup_laneq_u8", "arguments": [ - "float32_t a" + "uint8x16_t vec", + "const int lane" ], "return_type": { - "value": "uint32_t" + "value": "uint8x8_t" }, "Arguments_Preparation": { - "a": { - "register": "Sn" + "lane": { + "minimum": 0, + "maximum": 15 + }, + "vec": { + "register": "Vn.16B" } }, "Architectures": [ @@ -19870,72 +25776,72 @@ ], "instructions": [ [ - "FCVTZU" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtx_f32_f64", + "name": "vdup_n_f16", "arguments": [ - "float64x2_t a" + "float16_t value" ], "return_type": { - "value": "float32x2_t" + "value": "float16x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.2D" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "FCVTXN" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtx_high_f32_f64", + "name": "vdup_n_f32", "arguments": [ - "float32x2_t r", - "float64x2_t a" + "float32_t value" ], "return_type": { - "value": "float32x4_t" + "value": "float32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.2D" - }, - "r": { - "register": "Vd.2S" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "FCVTXN2" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vcvtxd_f32_f64", + "name": "vdup_n_f64", "arguments": [ - "float64_t a" + "float64_t value" ], "return_type": { - "value": "float32_t" + "value": "float64x1_t" }, "Arguments_Preparation": { - "a": { - "register": "Dn" + "value": { + "register": "rn" } }, "Architectures": [ @@ -19943,427 +25849,329 @@ ], "instructions": [ [ - "FCVTXN" + "INS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdiv_f32", + "name": "vdup_n_p16", "arguments": [ - "float32x2_t a", - "float32x2_t b" + "poly16_t value" ], "return_type": { - "value": "float32x2_t" + "value": "poly16x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.2S" - }, - "b": { - "register": "Vm.2S" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "FDIV" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdiv_f64", + "name": "vdup_n_p64", "arguments": [ - "float64x1_t a", - "float64x1_t b" + "poly64_t value" ], "return_type": { - "value": "float64x1_t" + "value": "poly64x1_t" }, "Arguments_Preparation": { - "a": { - "register": "Dn" - }, - "b": { - "register": "Dm" + "value": { + "register": "rn" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FDIV" + "INS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdivq_f32", + "name": "vdup_n_p8", "arguments": [ - "float32x4_t a", - "float32x4_t b" + "poly8_t value" ], "return_type": { - "value": "float32x4_t" + "value": "poly8x8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.4S" - }, - "b": { - "register": "Vm.4S" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "FDIV" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdivq_f64", + "name": "vdup_n_s16", "arguments": [ - "float64x2_t a", - "float64x2_t b" + "int16_t value" ], "return_type": { - "value": "float64x2_t" + "value": "int16x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.2D" - }, - "b": { - "register": "Vm.2D" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "FDIV" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdot_lane_s32", + "name": "vdup_n_s32", "arguments": [ - "int32x2_t r", - "int8x8_t a", - "int8x8_t b", - "const int lane" + "int32_t value" ], "return_type": { "value": "int32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.8B" - }, - "b": { - "register": "Vm.4B" - }, - "lane": { - "minimum": 0, - "maximum": 1 - }, - "r": { - "register": "Vd.2S" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", "A32", "A64" ], "instructions": [ [ - "SDOT" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdot_lane_u32", + "name": "vdup_n_s64", "arguments": [ - "uint32x2_t r", - "uint8x8_t a", - "uint8x8_t b", - "const int lane" + "int64_t value" ], "return_type": { - "value": "uint32x2_t" + "value": "int64x1_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.8B" - }, - "b": { - "register": "Vm.4B" - }, - "lane": { - "minimum": 0, - "maximum": 1 - }, - "r": { - "register": "Vd.2S" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", "A32", "A64" ], "instructions": [ [ - "UDOT" + "INS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdot_laneq_s32", + "name": "vdup_n_s8", "arguments": [ - "int32x2_t r", - "int8x8_t a", - "int8x16_t b", - "const int lane" + "int8_t value" ], "return_type": { - "value": "int32x2_t" + "value": "int8x8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.8B" - }, - "b": { - "register": "Vm.4B" - }, - "lane": { - "minimum": 0, - "maximum": 3 - }, - "r": { - "register": "Vd.2S" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "SDOT" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdot_laneq_u32", + "name": "vdup_n_u16", "arguments": [ - "uint32x2_t r", - "uint8x8_t a", - "uint8x16_t b", - "const int lane" + "uint16_t value" ], "return_type": { - "value": "uint32x2_t" + "value": "uint16x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.8B" - }, - "b": { - "register": "Vm.4B" - }, - "lane": { - "minimum": 0, - "maximum": 3 - }, - "r": { - "register": "Vd.2S" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "UDOT" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdot_s32", + "name": "vdup_n_u32", "arguments": [ - "int32x2_t r", - "int8x8_t a", - "int8x8_t b" + "uint32_t value" ], "return_type": { - "value": "int32x2_t" + "value": "uint32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.8B" - }, - "b": { - "register": "Vm.8B" - }, - "r": { - "register": "Vd.2S" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", "A32", "A64" ], "instructions": [ [ - "SDOT" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdot_u32", + "name": "vdup_n_u64", "arguments": [ - "uint32x2_t r", - "uint8x8_t a", - "uint8x8_t b" + "uint64_t value" ], "return_type": { - "value": "uint32x2_t" + "value": "uint64x1_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.8B" - }, - "b": { - "register": "Vm.8B" - }, - "r": { - "register": "Vd.2S" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", "A32", "A64" ], "instructions": [ [ - "UDOT" + "INS" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdotq_lane_s32", + "name": "vdup_n_u8", "arguments": [ - "int32x4_t r", - "int8x16_t a", - "int8x8_t b", - "const int lane" + "uint8_t value" ], "return_type": { - "value": "int32x4_t" + "value": "uint8x8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.16B" - }, - "b": { - "register": "Vm.4B" - }, - "lane": { - "minimum": 0, - "maximum": 1 - }, - "r": { - "register": "Vd.4S" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", "A32", "A64" ], "instructions": [ [ - "SDOT" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdotq_lane_u32", + "name": "vdupb_lane_p8", "arguments": [ - "uint32x4_t r", - "uint8x16_t a", - "uint8x8_t b", + "poly8x8_t vec", "const int lane" ], "return_type": { - "value": "uint32x4_t" + "value": "poly8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.16B" - }, - "b": { - "register": "Vm.4B" - }, "lane": { "minimum": 0, - "maximum": 1 + "maximum": 7 }, - "r": { - "register": "Vd.4S" + "vec": { + "register": "Vn.8B" } }, "Architectures": [ - "A32", "A64" ], "instructions": [ [ - "UDOT" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdotq_laneq_s32", + "name": "vdupb_lane_s8", "arguments": [ - "int32x4_t r", - "int8x16_t a", - "int8x16_t b", + "int8x8_t vec", "const int lane" ], "return_type": { - "value": "int32x4_t" + "value": "int8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.16B" - }, - "b": { - "register": "Vm.4B" - }, "lane": { "minimum": 0, - "maximum": 3 + "maximum": 7 }, - "r": { - "register": "Vd.4S" + "vec": { + "register": "Vn.8B" } }, "Architectures": [ @@ -20371,35 +26179,27 @@ ], "instructions": [ [ - "SDOT" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdotq_laneq_u32", + "name": "vdupb_lane_u8", "arguments": [ - "uint32x4_t r", - "uint8x16_t a", - "uint8x16_t b", + "uint8x8_t vec", "const int lane" ], "return_type": { - "value": "uint32x4_t" + "value": "uint8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.16B" - }, - "b": { - "register": "Vm.4B" - }, "lane": { "minimum": 0, - "maximum": 3 + "maximum": 7 }, - "r": { - "register": "Vd.4S" + "vec": { + "register": "Vn.8B" } }, "Architectures": [ @@ -20407,96 +26207,86 @@ ], "instructions": [ [ - "UDOT" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdotq_s32", + "name": "vdupb_laneq_p8", "arguments": [ - "int32x4_t r", - "int8x16_t a", - "int8x16_t b" + "poly8x16_t vec", + "const int lane" ], "return_type": { - "value": "int32x4_t" + "value": "poly8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.16B" - }, - "b": { - "register": "Vm.16B" + "lane": { + "minimum": 0, + "maximum": 15 }, - "r": { - "register": "Vd.4S" + "vec": { + "register": "Vn.16B" } }, "Architectures": [ - "A32", "A64" ], "instructions": [ [ - "SDOT" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdotq_u32", + "name": "vdupb_laneq_s8", "arguments": [ - "uint32x4_t r", - "uint8x16_t a", - "uint8x16_t b" + "int8x16_t vec", + "const int lane" ], "return_type": { - "value": "uint32x4_t" + "value": "int8_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.16B" - }, - "b": { - "register": "Vm.16B" + "lane": { + "minimum": 0, + "maximum": 15 }, - "r": { - "register": "Vd.4S" + "vec": { + "register": "Vn.16B" } }, "Architectures": [ - "A32", "A64" ], "instructions": [ [ - "UDOT" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdup_lane_f32", + "name": "vdupb_laneq_u8", "arguments": [ - "float32x2_t vec", + "uint8x16_t vec", "const int lane" ], "return_type": { - "value": "float32x2_t" + "value": "uint8_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 1 + "maximum": 15 }, "vec": { - "register": "Vn.2S" + "register": "Vn.16B" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -20507,13 +26297,13 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_lane_f64", + "name": "vdupd_lane_f64", "arguments": [ "float64x1_t vec", "const int lane" ], "return_type": { - "value": "float64x1_t" + "value": "float64_t" }, "Arguments_Preparation": { "lane": { @@ -20535,26 +26325,24 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_lane_p16", + "name": "vdupd_lane_s64", "arguments": [ - "poly16x4_t vec", + "int64x1_t vec", "const int lane" ], "return_type": { - "value": "poly16x4_t" + "value": "int64_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 3 + "maximum": 0 }, "vec": { - "register": "Vn.4H" + "register": "Vn.1D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -20565,13 +26353,13 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_lane_p64", + "name": "vdupd_lane_u64", "arguments": [ - "poly64x1_t vec", + "uint64x1_t vec", "const int lane" ], "return_type": { - "value": "poly64x1_t" + "value": "uint64_t" }, "Arguments_Preparation": { "lane": { @@ -20583,7 +26371,6 @@ } }, "Architectures": [ - "A32", "A64" ], "instructions": [ @@ -20594,26 +26381,24 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_lane_p8", + "name": "vdupd_laneq_f64", "arguments": [ - "poly8x8_t vec", + "float64x2_t vec", "const int lane" ], "return_type": { - "value": "poly8x8_t" + "value": "float64_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 7 + "maximum": 1 }, "vec": { - "register": "Vn.8B" + "register": "Vn.2D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -20624,26 +26409,24 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_lane_s16", + "name": "vdupd_laneq_s64", "arguments": [ - "int16x4_t vec", + "int64x2_t vec", "const int lane" ], "return_type": { - "value": "int16x4_t" + "value": "int64_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 3 + "maximum": 1 }, "vec": { - "register": "Vn.4H" + "register": "Vn.2D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -20654,13 +26437,13 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_lane_s32", + "name": "vdupd_laneq_u64", "arguments": [ - "int32x2_t vec", + "uint64x2_t vec", "const int lane" ], "return_type": { - "value": "int32x2_t" + "value": "uint64_t" }, "Arguments_Preparation": { "lane": { @@ -20668,12 +26451,10 @@ "maximum": 1 }, "vec": { - "register": "Vn.2S" + "register": "Vn.2D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -20684,26 +26465,24 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_lane_s64", + "name": "vduph_lane_f16", "arguments": [ - "int64x1_t vec", + "float16x4_t vec", "const int lane" ], "return_type": { - "value": "int64x1_t" + "value": "float16_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 0 + "maximum": 3 }, "vec": { - "register": "Vn.1D" + "register": "Vn.4H" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -20714,26 +26493,24 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_lane_s8", + "name": "vduph_lane_p16", "arguments": [ - "int8x8_t vec", + "poly16x4_t vec", "const int lane" ], "return_type": { - "value": "int8x8_t" + "value": "poly16_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 7 + "maximum": 3 }, "vec": { - "register": "Vn.8B" + "register": "Vn.4H" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -20744,13 +26521,13 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_lane_u16", + "name": "vduph_lane_s16", "arguments": [ - "uint16x4_t vec", + "int16x4_t vec", "const int lane" ], "return_type": { - "value": "uint16x4_t" + "value": "int16_t" }, "Arguments_Preparation": { "lane": { @@ -20762,8 +26539,6 @@ } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -20774,26 +26549,24 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_lane_u32", + "name": "vduph_lane_u16", "arguments": [ - "uint32x2_t vec", + "uint16x4_t vec", "const int lane" ], "return_type": { - "value": "uint32x2_t" + "value": "uint16_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 1 + "maximum": 3 }, "vec": { - "register": "Vn.2S" + "register": "Vn.4H" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -20804,26 +26577,24 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_lane_u64", + "name": "vduph_laneq_f16", "arguments": [ - "uint64x1_t vec", + "float16x8_t vec", "const int lane" ], "return_type": { - "value": "uint64x1_t" + "value": "float16_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 0 + "maximum": 7 }, "vec": { - "register": "Vn.1D" + "register": "Vn.8H" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -20834,13 +26605,13 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_lane_u8", + "name": "vduph_laneq_p16", "arguments": [ - "uint8x8_t vec", + "poly16x8_t vec", "const int lane" ], "return_type": { - "value": "uint8x8_t" + "value": "poly16_t" }, "Arguments_Preparation": { "lane": { @@ -20848,12 +26619,10 @@ "maximum": 7 }, "vec": { - "register": "Vn.8B" + "register": "Vn.8H" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -20864,21 +26633,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_laneq_f32", + "name": "vduph_laneq_s16", "arguments": [ - "float32x4_t vec", + "int16x8_t vec", "const int lane" ], "return_type": { - "value": "float32x2_t" + "value": "int16_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 3 + "maximum": 7 }, "vec": { - "register": "Vn.4S" + "register": "Vn.8H" } }, "Architectures": [ @@ -20892,21 +26661,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_laneq_f64", + "name": "vduph_laneq_u16", "arguments": [ - "float64x2_t vec", + "uint16x8_t vec", "const int lane" ], "return_type": { - "value": "float64x1_t" + "value": "uint16_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 1 + "maximum": 7 }, "vec": { - "register": "Vn.2D" + "register": "Vn.8H" } }, "Architectures": [ @@ -20920,24 +26689,26 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_laneq_p16", + "name": "vdupq_lane_f16", "arguments": [ - "poly16x8_t vec", + "float16x4_t vec", "const int lane" ], "return_type": { - "value": "poly16x4_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 7 + "maximum": 3 }, "vec": { - "register": "Vn.8H" + "register": "Vn.4H" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -20948,13 +26719,13 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_laneq_p64", + "name": "vdupq_lane_f32", "arguments": [ - "poly64x2_t vec", + "float32x2_t vec", "const int lane" ], "return_type": { - "value": "poly64x1_t" + "value": "float32x4_t" }, "Arguments_Preparation": { "lane": { @@ -20962,10 +26733,12 @@ "maximum": 1 }, "vec": { - "register": "Vn.2D" + "register": "Vn.2S" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -20976,21 +26749,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_laneq_p8", + "name": "vdupq_lane_f64", "arguments": [ - "poly8x16_t vec", + "float64x1_t vec", "const int lane" ], "return_type": { - "value": "poly8x8_t" + "value": "float64x2_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 15 + "maximum": 0 }, "vec": { - "register": "Vn.16B" + "register": "Vn.1D" } }, "Architectures": [ @@ -21004,24 +26777,26 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_laneq_s16", + "name": "vdupq_lane_p16", "arguments": [ - "int16x8_t vec", + "poly16x4_t vec", "const int lane" ], "return_type": { - "value": "int16x4_t" + "value": "poly16x8_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 7 + "maximum": 3 }, "vec": { - "register": "Vn.8H" + "register": "Vn.4H" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -21032,24 +26807,25 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_laneq_s32", + "name": "vdupq_lane_p64", "arguments": [ - "int32x4_t vec", + "poly64x1_t vec", "const int lane" ], "return_type": { - "value": "int32x2_t" + "value": "poly64x2_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 3 + "maximum": 0 }, "vec": { - "register": "Vn.4S" + "register": "Vn.1D" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ @@ -21060,24 +26836,26 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_laneq_s64", + "name": "vdupq_lane_p8", "arguments": [ - "int64x2_t vec", + "poly8x8_t vec", "const int lane" ], "return_type": { - "value": "int64x1_t" + "value": "poly8x16_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 1 + "maximum": 7 }, "vec": { - "register": "Vn.2D" + "register": "Vn.8B" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -21088,24 +26866,26 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_laneq_s8", + "name": "vdupq_lane_s16", "arguments": [ - "int8x16_t vec", + "int16x4_t vec", "const int lane" ], "return_type": { - "value": "int8x8_t" + "value": "int16x8_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 15 + "maximum": 3 }, "vec": { - "register": "Vn.16B" + "register": "Vn.4H" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -21116,24 +26896,26 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_laneq_u16", + "name": "vdupq_lane_s32", "arguments": [ - "uint16x8_t vec", + "int32x2_t vec", "const int lane" ], "return_type": { - "value": "uint16x4_t" + "value": "int32x4_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 7 + "maximum": 1 }, "vec": { - "register": "Vn.8H" + "register": "Vn.2S" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -21144,24 +26926,26 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_laneq_u32", + "name": "vdupq_lane_s64", "arguments": [ - "uint32x4_t vec", + "int64x1_t vec", "const int lane" ], "return_type": { - "value": "uint32x2_t" + "value": "int64x2_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 3 + "maximum": 0 }, "vec": { - "register": "Vn.4S" + "register": "Vn.1D" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -21172,24 +26956,26 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_laneq_u64", + "name": "vdupq_lane_s8", "arguments": [ - "uint64x2_t vec", + "int8x8_t vec", "const int lane" ], "return_type": { - "value": "uint64x1_t" + "value": "int8x16_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 1 + "maximum": 7 }, "vec": { - "register": "Vn.2D" + "register": "Vn.8B" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -21200,24 +26986,26 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_laneq_u8", + "name": "vdupq_lane_u16", "arguments": [ - "uint8x16_t vec", + "uint16x4_t vec", "const int lane" ], "return_type": { - "value": "uint8x8_t" + "value": "uint16x8_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 15 + "maximum": 3 }, "vec": { - "register": "Vn.16B" + "register": "Vn.4H" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -21228,16 +27016,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_n_f32", + "name": "vdupq_lane_u32", "arguments": [ - "float32_t value" + "uint32x2_t vec", + "const int lane" ], "return_type": { - "value": "float32x2_t" + "value": "uint32x4_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2S" } }, "Architectures": [ @@ -21253,39 +27046,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_n_f64", - "arguments": [ - "float64_t value" - ], - "return_type": { - "value": "float64x1_t" - }, - "Arguments_Preparation": { - "value": { - "register": "rn" - } - }, - "Architectures": [ - "A64" - ], - "instructions": [ - [ - "INS" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vdup_n_p16", + "name": "vdupq_lane_u64", "arguments": [ - "poly16_t value" + "uint64x1_t vec", + "const int lane" ], "return_type": { - "value": "poly16x4_t" + "value": "uint64x2_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "lane": { + "minimum": 0, + "maximum": 0 + }, + "vec": { + "register": "Vn.1D" } }, "Architectures": [ @@ -21301,45 +27076,54 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_n_p64", + "name": "vdupq_lane_u8", "arguments": [ - "poly64_t value" + "uint8x8_t vec", + "const int lane" ], "return_type": { - "value": "poly64x1_t" + "value": "uint8x16_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8B" } }, "Architectures": [ + "v7", "A32", "A64" ], "instructions": [ [ - "INS" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdup_n_p8", + "name": "vdupq_laneq_f16", "arguments": [ - "poly8_t value" + "float16x8_t vec", + "const int lane" ], "return_type": { - "value": "poly8x8_t" + "value": "float16x8_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -21350,21 +27134,24 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_n_s16", + "name": "vdupq_laneq_f32", "arguments": [ - "int16_t value" + "float32x4_t vec", + "const int lane" ], "return_type": { - "value": "int16x4_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4S" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -21375,21 +27162,24 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_n_s32", + "name": "vdupq_laneq_f64", "arguments": [ - "int32_t value" + "float64x2_t vec", + "const int lane" ], "return_type": { - "value": "int32x2_t" + "value": "float64x2_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" } }, - "Architectures": [ - "v7", - "A32", + "Architectures": [ "A64" ], "instructions": [ @@ -21400,46 +27190,52 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_n_s64", + "name": "vdupq_laneq_p16", "arguments": [ - "int64_t value" + "poly16x8_t vec", + "const int lane" ], "return_type": { - "value": "int64x1_t" + "value": "poly16x8_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "INS" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdup_n_s8", + "name": "vdupq_laneq_p64", "arguments": [ - "int8_t value" + "poly64x2_t vec", + "const int lane" ], "return_type": { - "value": "int8x8_t" + "value": "poly64x2_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -21450,21 +27246,24 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_n_u16", + "name": "vdupq_laneq_p8", "arguments": [ - "uint16_t value" + "poly8x16_t vec", + "const int lane" ], "return_type": { - "value": "uint16x4_t" + "value": "poly8x16_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "lane": { + "minimum": 0, + "maximum": 15 + }, + "vec": { + "register": "Vn.16B" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -21475,21 +27274,24 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_n_u32", + "name": "vdupq_laneq_s16", "arguments": [ - "uint32_t value" + "int16x8_t vec", + "const int lane" ], "return_type": { - "value": "uint32x2_t" + "value": "int16x8_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "lane": { + "minimum": 0, + "maximum": 7 + }, + "vec": { + "register": "Vn.8H" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -21500,46 +27302,52 @@ }, { "SIMD_ISA": "Neon", - "name": "vdup_n_u64", + "name": "vdupq_laneq_s32", "arguments": [ - "uint64_t value" + "int32x4_t vec", + "const int lane" ], "return_type": { - "value": "uint64x1_t" + "value": "int32x4_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "lane": { + "minimum": 0, + "maximum": 3 + }, + "vec": { + "register": "Vn.4S" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "INS" + "DUP" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdup_n_u8", + "name": "vdupq_laneq_s64", "arguments": [ - "uint8_t value" + "int64x2_t vec", + "const int lane" ], "return_type": { - "value": "uint8x8_t" + "value": "int64x2_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "lane": { + "minimum": 0, + "maximum": 1 + }, + "vec": { + "register": "Vn.2D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -21550,21 +27358,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupb_lane_p8", + "name": "vdupq_laneq_s8", "arguments": [ - "poly8x8_t vec", + "int8x16_t vec", "const int lane" ], "return_type": { - "value": "poly8_t" + "value": "int8x16_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 7 + "maximum": 15 }, "vec": { - "register": "Vn.8B" + "register": "Vn.16B" } }, "Architectures": [ @@ -21578,13 +27386,13 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupb_lane_s8", + "name": "vdupq_laneq_u16", "arguments": [ - "int8x8_t vec", + "uint16x8_t vec", "const int lane" ], "return_type": { - "value": "int8_t" + "value": "uint16x8_t" }, "Arguments_Preparation": { "lane": { @@ -21592,7 +27400,7 @@ "maximum": 7 }, "vec": { - "register": "Vn.8B" + "register": "Vn.8H" } }, "Architectures": [ @@ -21606,21 +27414,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupb_lane_u8", + "name": "vdupq_laneq_u32", "arguments": [ - "uint8x8_t vec", + "uint32x4_t vec", "const int lane" ], "return_type": { - "value": "uint8_t" + "value": "uint32x4_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 7 + "maximum": 3 }, "vec": { - "register": "Vn.8B" + "register": "Vn.4S" } }, "Architectures": [ @@ -21634,21 +27442,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupb_laneq_p8", + "name": "vdupq_laneq_u64", "arguments": [ - "poly8x16_t vec", + "uint64x2_t vec", "const int lane" ], "return_type": { - "value": "poly8_t" + "value": "uint64x2_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 15 + "maximum": 1 }, "vec": { - "register": "Vn.16B" + "register": "Vn.2D" } }, "Architectures": [ @@ -21662,13 +27470,13 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupb_laneq_s8", + "name": "vdupq_laneq_u8", "arguments": [ - "int8x16_t vec", + "uint8x16_t vec", "const int lane" ], "return_type": { - "value": "int8_t" + "value": "uint8x16_t" }, "Arguments_Preparation": { "lane": { @@ -21690,24 +27498,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupb_laneq_u8", + "name": "vdupq_n_f16", "arguments": [ - "uint8x16_t vec", - "const int lane" + "float16_t value" ], "return_type": { - "value": "uint8_t" + "value": "float16x8_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 15 - }, - "vec": { - "register": "Vn.16B" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -21718,24 +27523,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupd_lane_f64", + "name": "vdupq_n_f32", "arguments": [ - "float64x1_t vec", - "const int lane" + "float32_t value" ], "return_type": { - "value": "float64_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 0 - }, - "vec": { - "register": "Vn.1D" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -21746,21 +27548,16 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupd_lane_s64", + "name": "vdupq_n_f64", "arguments": [ - "int64x1_t vec", - "const int lane" + "float64_t value" ], "return_type": { - "value": "int64_t" + "value": "float64x2_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 0 - }, - "vec": { - "register": "Vn.1D" + "value": { + "register": "rn" } }, "Architectures": [ @@ -21774,24 +27571,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupd_lane_u64", + "name": "vdupq_n_p16", "arguments": [ - "uint64x1_t vec", - "const int lane" + "poly16_t value" ], "return_type": { - "value": "uint64_t" + "value": "poly16x8_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 0 - }, - "vec": { - "register": "Vn.1D" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -21802,24 +27596,20 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupd_laneq_f64", + "name": "vdupq_n_p64", "arguments": [ - "float64x2_t vec", - "const int lane" + "poly64_t value" ], "return_type": { - "value": "float64_t" + "value": "poly64x2_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 1 - }, - "vec": { - "register": "Vn.2D" + "value": { + "register": "rn" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ @@ -21830,24 +27620,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupd_laneq_s64", + "name": "vdupq_n_p8", "arguments": [ - "int64x2_t vec", - "const int lane" + "poly8_t value" ], "return_type": { - "value": "int64_t" + "value": "poly8x16_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 1 - }, - "vec": { - "register": "Vn.2D" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -21858,24 +27645,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupd_laneq_u64", + "name": "vdupq_n_s16", "arguments": [ - "uint64x2_t vec", - "const int lane" + "int16_t value" ], "return_type": { - "value": "uint64_t" + "value": "int16x8_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 1 - }, - "vec": { - "register": "Vn.2D" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -21886,24 +27670,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vduph_lane_p16", + "name": "vdupq_n_s32", "arguments": [ - "poly16x4_t vec", - "const int lane" + "int32_t value" ], "return_type": { - "value": "poly16_t" + "value": "int32x4_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 3 - }, - "vec": { - "register": "Vn.4H" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -21914,24 +27695,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vduph_lane_s16", + "name": "vdupq_n_s64", "arguments": [ - "int16x4_t vec", - "const int lane" + "int64_t value" ], "return_type": { - "value": "int16_t" + "value": "int64x2_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 3 - }, - "vec": { - "register": "Vn.4H" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -21942,24 +27720,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vduph_lane_u16", + "name": "vdupq_n_s8", "arguments": [ - "uint16x4_t vec", - "const int lane" + "int8_t value" ], "return_type": { - "value": "uint16_t" + "value": "int8x16_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 3 - }, - "vec": { - "register": "Vn.4H" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -21970,24 +27745,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vduph_laneq_p16", + "name": "vdupq_n_u16", "arguments": [ - "poly16x8_t vec", - "const int lane" + "uint16_t value" ], "return_type": { - "value": "poly16_t" + "value": "uint16x8_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 7 - }, - "vec": { - "register": "Vn.8H" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -21998,24 +27770,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vduph_laneq_s16", + "name": "vdupq_n_u32", "arguments": [ - "int16x8_t vec", - "const int lane" + "uint32_t value" ], "return_type": { - "value": "int16_t" + "value": "uint32x4_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 7 - }, - "vec": { - "register": "Vn.8H" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -22026,24 +27795,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vduph_laneq_u16", + "name": "vdupq_n_u64", "arguments": [ - "uint16x8_t vec", - "const int lane" + "uint64_t value" ], "return_type": { - "value": "uint16_t" + "value": "uint64x2_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 7 - }, - "vec": { - "register": "Vn.8H" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -22054,21 +27820,16 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupq_lane_f32", + "name": "vdupq_n_u8", "arguments": [ - "float32x2_t vec", - "const int lane" + "uint8_t value" ], "return_type": { - "value": "float32x4_t" + "value": "uint8x16_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 1 - }, - "vec": { - "register": "Vn.2S" + "value": { + "register": "rn" } }, "Architectures": [ @@ -22084,21 +27845,21 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupq_lane_f64", + "name": "vdups_lane_f32", "arguments": [ - "float64x1_t vec", + "float32x2_t vec", "const int lane" ], "return_type": { - "value": "float64x2_t" + "value": "float32_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 0 + "maximum": 1 }, "vec": { - "register": "Vn.1D" + "register": "Vn.2S" } }, "Architectures": [ @@ -22112,26 +27873,24 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupq_lane_p16", + "name": "vdups_lane_s32", "arguments": [ - "poly16x4_t vec", + "int32x2_t vec", "const int lane" ], "return_type": { - "value": "poly16x8_t" + "value": "int32_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 3 + "maximum": 1 }, "vec": { - "register": "Vn.4H" + "register": "Vn.2S" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -22142,25 +27901,24 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupq_lane_p64", + "name": "vdups_lane_u32", "arguments": [ - "poly64x1_t vec", + "uint32x2_t vec", "const int lane" ], "return_type": { - "value": "poly64x2_t" + "value": "uint32_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 0 + "maximum": 1 }, "vec": { - "register": "Vn.1D" + "register": "Vn.2S" } }, "Architectures": [ - "A32", "A64" ], "instructions": [ @@ -22171,26 +27929,24 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupq_lane_p8", + "name": "vdups_laneq_f32", "arguments": [ - "poly8x8_t vec", + "float32x4_t vec", "const int lane" ], "return_type": { - "value": "poly8x16_t" + "value": "float32_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 7 + "maximum": 3 }, "vec": { - "register": "Vn.8B" + "register": "Vn.4S" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -22201,13 +27957,13 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupq_lane_s16", + "name": "vdups_laneq_s32", "arguments": [ - "int16x4_t vec", + "int32x4_t vec", "const int lane" ], "return_type": { - "value": "int16x8_t" + "value": "int32_t" }, "Arguments_Preparation": { "lane": { @@ -22215,12 +27971,10 @@ "maximum": 3 }, "vec": { - "register": "Vn.4H" + "register": "Vn.4S" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -22231,26 +27985,24 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupq_lane_s32", + "name": "vdups_laneq_u32", "arguments": [ - "int32x2_t vec", + "uint32x4_t vec", "const int lane" ], "return_type": { - "value": "int32x4_t" + "value": "uint32_t" }, "Arguments_Preparation": { "lane": { "minimum": 0, - "maximum": 1 + "maximum": 3 }, "vec": { - "register": "Vn.2S" + "register": "Vn.4S" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -22261,560 +28013,555 @@ }, { "SIMD_ISA": "Neon", - "name": "vdupq_lane_s64", + "name": "veor3q_s16", "arguments": [ - "int64x1_t vec", - "const int lane" + "int16x8_t a", + "int16x8_t b", + "int16x8_t c" ], "return_type": { - "value": "int64x2_t" + "value": "int16x8_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 0 + "a": { + "register": "Vn.16B" }, - "vec": { - "register": "Vn.1D" - } + "b": {}, + "c": {} }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR3" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_lane_s8", + "name": "veor3q_s32", "arguments": [ - "int8x8_t vec", - "const int lane" + "int32x4_t a", + "int32x4_t b", + "int32x4_t c" ], "return_type": { - "value": "int8x16_t" + "value": "int32x4_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 7 + "a": { + "register": "Vn.16B" }, - "vec": { - "register": "Vn.8B" - } + "b": {}, + "c": {} }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR3" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_lane_u16", + "name": "veor3q_s64", "arguments": [ - "uint16x4_t vec", - "const int lane" + "int64x2_t a", + "int64x2_t b", + "int64x2_t c" ], "return_type": { - "value": "uint16x8_t" + "value": "int64x2_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 3 - }, - "vec": { - "register": "Vn.4H" - } + "a": { + "register": "Vn.16B" + }, + "b": {}, + "c": {} }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR3" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_lane_u32", + "name": "veor3q_s8", "arguments": [ - "uint32x2_t vec", - "const int lane" + "int8x16_t a", + "int8x16_t b", + "int8x16_t c" ], "return_type": { - "value": "uint32x4_t" + "value": "int8x16_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 1 + "a": { + "register": "Vn.16B" }, - "vec": { - "register": "Vn.2S" - } + "b": {}, + "c": {} }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR3" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_lane_u64", + "name": "veor3q_u16", "arguments": [ - "uint64x1_t vec", - "const int lane" + "uint16x8_t a", + "uint16x8_t b", + "uint16x8_t c" ], "return_type": { - "value": "uint64x2_t" + "value": "uint16x8_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 0 + "a": { + "register": "Vn.16B" }, - "vec": { - "register": "Vn.1D" - } + "b": {}, + "c": {} }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR3" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_lane_u8", + "name": "veor3q_u32", "arguments": [ - "uint8x8_t vec", - "const int lane" + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t c" ], "return_type": { - "value": "uint8x16_t" + "value": "uint32x4_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 7 + "a": { + "register": "Vn.16B" }, - "vec": { - "register": "Vn.8B" - } + "b": {}, + "c": {} }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR3" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_laneq_f32", + "name": "veor3q_u64", "arguments": [ - "float32x4_t vec", - "const int lane" + "uint64x2_t a", + "uint64x2_t b", + "uint64x2_t c" ], "return_type": { - "value": "float32x4_t" + "value": "uint64x2_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 3 + "a": { + "register": "Vn.16B" }, - "vec": { - "register": "Vn.4S" - } + "b": {}, + "c": {} }, "Architectures": [ "A64" ], "instructions": [ [ - "DUP" + "EOR3" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_laneq_f64", + "name": "veor3q_u8", "arguments": [ - "float64x2_t vec", - "const int lane" + "uint8x16_t a", + "uint8x16_t b", + "uint8x16_t c" ], "return_type": { - "value": "float64x2_t" + "value": "uint8x16_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 1 + "a": { + "register": "Vn.16B" }, - "vec": { - "register": "Vn.2D" - } + "b": {}, + "c": {} }, "Architectures": [ "A64" ], "instructions": [ [ - "DUP" + "EOR3" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_laneq_p16", + "name": "veor_s16", "arguments": [ - "poly16x8_t vec", - "const int lane" + "int16x4_t a", + "int16x4_t b" ], "return_type": { - "value": "poly16x8_t" + "value": "int16x4_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 7 + "a": { + "register": "Vn.8B" }, - "vec": { - "register": "Vn.8H" + "b": { + "register": "Vm.8B" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_laneq_p64", + "name": "veor_s32", "arguments": [ - "poly64x2_t vec", - "const int lane" + "int32x2_t a", + "int32x2_t b" ], "return_type": { - "value": "poly64x2_t" + "value": "int32x2_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 1 + "a": { + "register": "Vn.8B" }, - "vec": { - "register": "Vn.2D" + "b": { + "register": "Vm.8B" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_laneq_p8", + "name": "veor_s64", "arguments": [ - "poly8x16_t vec", - "const int lane" + "int64x1_t a", + "int64x1_t b" ], "return_type": { - "value": "poly8x16_t" + "value": "int64x1_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 15 + "a": { + "register": "Vn.8B" }, - "vec": { - "register": "Vn.16B" + "b": { + "register": "Vm.8B" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_laneq_s16", + "name": "veor_s8", "arguments": [ - "int16x8_t vec", - "const int lane" + "int8x8_t a", + "int8x8_t b" ], "return_type": { - "value": "int16x8_t" + "value": "int8x8_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 7 + "a": { + "register": "Vn.8B" }, - "vec": { - "register": "Vn.8H" + "b": { + "register": "Vm.8B" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_laneq_s32", + "name": "veor_u16", "arguments": [ - "int32x4_t vec", - "const int lane" + "uint16x4_t a", + "uint16x4_t b" ], "return_type": { - "value": "int32x4_t" + "value": "uint16x4_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 3 + "a": { + "register": "Vn.8B" }, - "vec": { - "register": "Vn.4S" + "b": { + "register": "Vm.8B" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_laneq_s64", + "name": "veor_u32", "arguments": [ - "int64x2_t vec", - "const int lane" + "uint32x2_t a", + "uint32x2_t b" ], "return_type": { - "value": "int64x2_t" + "value": "uint32x2_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 1 + "a": { + "register": "Vn.8B" }, - "vec": { - "register": "Vn.2D" + "b": { + "register": "Vm.8B" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_laneq_s8", + "name": "veor_u64", "arguments": [ - "int8x16_t vec", - "const int lane" + "uint64x1_t a", + "uint64x1_t b" ], "return_type": { - "value": "int8x16_t" + "value": "uint64x1_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 15 + "a": { + "register": "Vn.8B" }, - "vec": { - "register": "Vn.16B" + "b": { + "register": "Vm.8B" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_laneq_u16", + "name": "veor_u8", "arguments": [ - "uint16x8_t vec", - "const int lane" + "uint8x8_t a", + "uint8x8_t b" ], "return_type": { - "value": "uint16x8_t" + "value": "uint8x8_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 7 + "a": { + "register": "Vn.8B" }, - "vec": { - "register": "Vn.8H" + "b": { + "register": "Vm.8B" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_laneq_u32", + "name": "veorq_s16", "arguments": [ - "uint32x4_t vec", - "const int lane" + "int16x8_t a", + "int16x8_t b" ], "return_type": { - "value": "uint32x4_t" + "value": "int16x8_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 3 + "a": { + "register": "Vn.16B" }, - "vec": { - "register": "Vn.4S" + "b": { + "register": "Vm.16B" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_laneq_u64", + "name": "veorq_s32", "arguments": [ - "uint64x2_t vec", - "const int lane" + "int32x4_t a", + "int32x4_t b" ], "return_type": { - "value": "uint64x2_t" + "value": "int32x4_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 1 + "a": { + "register": "Vn.16B" }, - "vec": { - "register": "Vn.2D" + "b": { + "register": "Vm.16B" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_laneq_u8", + "name": "veorq_s64", "arguments": [ - "uint8x16_t vec", - "const int lane" + "int64x2_t a", + "int64x2_t b" ], "return_type": { - "value": "uint8x16_t" + "value": "int64x2_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 15 - }, - "vec": { + "a": { "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_n_f32", + "name": "veorq_s8", "arguments": [ - "float32_t value" + "int8x16_t a", + "int8x16_t b" ], "return_type": { - "value": "float32x4_t" + "value": "int8x16_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" } }, "Architectures": [ @@ -22824,45 +28571,55 @@ ], "instructions": [ [ - "DUP" + "EOR" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_n_f64", + "name": "veorq_u16", "arguments": [ - "float64_t value" + "uint16x8_t a", + "uint16x8_t b" ], "return_type": { - "value": "float64x2_t" + "value": "uint16x8_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_n_p16", + "name": "veorq_u32", "arguments": [ - "poly16_t value" + "uint32x4_t a", + "uint32x4_t b" ], "return_type": { - "value": "poly16x8_t" + "value": "uint32x4_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" } }, "Architectures": [ @@ -22872,46 +28629,55 @@ ], "instructions": [ [ - "DUP" + "EOR" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_n_p64", + "name": "veorq_u64", "arguments": [ - "poly64_t value" + "uint64x2_t a", + "uint64x2_t b" ], "return_type": { - "value": "poly64x2_t" + "value": "uint64x2_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" } }, "Architectures": [ + "v7", "A32", "A64" ], "instructions": [ [ - "DUP" + "EOR" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_n_p8", + "name": "veorq_u8", "arguments": [ - "poly8_t value" + "uint8x16_t a", + "uint8x16_t b" ], "return_type": { - "value": "poly8x16_t" + "value": "uint8x16_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm.16B" } }, "Architectures": [ @@ -22921,22 +28687,31 @@ ], "instructions": [ [ - "DUP" + "EOR" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_n_s16", + "name": "vext_f16", "arguments": [ - "int16_t value" + "float16x4_t a", + "float16x4_t b", + "const int n" ], "return_type": { - "value": "int16x8_t" + "value": "float16x4_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 3 } }, "Architectures": [ @@ -22946,22 +28721,31 @@ ], "instructions": [ [ - "DUP" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_n_s32", + "name": "vext_f32", "arguments": [ - "int32_t value" + "float32x2_t a", + "float32x2_t b", + "const int n" ], "return_type": { - "value": "int32x4_t" + "value": "float32x2_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 1 } }, "Architectures": [ @@ -22971,47 +28755,63 @@ ], "instructions": [ [ - "DUP" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_n_s64", + "name": "vext_f64", "arguments": [ - "int64_t value" + "float64x1_t a", + "float64x1_t b", + "const int n" ], "return_type": { - "value": "int64x2_t" + "value": "float64x1_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 0 } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "DUP" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_n_s8", + "name": "vext_p16", "arguments": [ - "int8_t value" + "poly16x4_t a", + "poly16x4_t b", + "const int n" ], "return_type": { - "value": "int8x16_t" + "value": "poly16x4_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 3 } }, "Architectures": [ @@ -23021,47 +28821,64 @@ ], "instructions": [ [ - "DUP" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_n_u16", + "name": "vext_p64", "arguments": [ - "uint16_t value" + "poly64x1_t a", + "poly64x1_t b", + "const int n" ], "return_type": { - "value": "uint16x8_t" + "value": "poly64x1_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 0 } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "DUP" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_n_u32", + "name": "vext_p8", "arguments": [ - "uint32_t value" + "poly8x8_t a", + "poly8x8_t b", + "const int n" ], "return_type": { - "value": "uint32x4_t" + "value": "poly8x8_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 7 } }, "Architectures": [ @@ -23071,22 +28888,31 @@ ], "instructions": [ [ - "DUP" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_n_u64", + "name": "vext_s16", "arguments": [ - "uint64_t value" + "int16x4_t a", + "int16x4_t b", + "const int n" ], "return_type": { - "value": "uint64x2_t" + "value": "int16x4_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 3 } }, "Architectures": [ @@ -23096,22 +28922,31 @@ ], "instructions": [ [ - "DUP" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdupq_n_u8", + "name": "vext_s32", "arguments": [ - "uint8_t value" + "int32x2_t a", + "int32x2_t b", + "const int n" ], "return_type": { - "value": "uint8x16_t" + "value": "int32x2_t" }, "Arguments_Preparation": { - "value": { - "register": "rn" + "a": { + "register": "Vn.8B" + }, + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 1 } }, "Architectures": [ @@ -23121,410 +28956,504 @@ ], "instructions": [ [ - "DUP" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdups_lane_f32", + "name": "vext_s64", "arguments": [ - "float32x2_t vec", - "const int lane" + "int64x1_t a", + "int64x1_t b", + "const int n" ], "return_type": { - "value": "float32_t" + "value": "int64x1_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 1 + "a": { + "register": "Vn.8B" }, - "vec": { - "register": "Vn.2S" + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 0 } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdups_lane_s32", + "name": "vext_s8", "arguments": [ - "int32x2_t vec", - "const int lane" + "int8x8_t a", + "int8x8_t b", + "const int n" ], "return_type": { - "value": "int32_t" + "value": "int8x8_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 1 + "a": { + "register": "Vn.8B" }, - "vec": { - "register": "Vn.2S" + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 7 } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdups_lane_u32", + "name": "vext_u16", "arguments": [ - "uint32x2_t vec", - "const int lane" + "uint16x4_t a", + "uint16x4_t b", + "const int n" ], "return_type": { - "value": "uint32_t" + "value": "uint16x4_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 1 + "a": { + "register": "Vn.8B" }, - "vec": { - "register": "Vn.2S" + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 3 } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdups_laneq_f32", + "name": "vext_u32", "arguments": [ - "float32x4_t vec", - "const int lane" + "uint32x2_t a", + "uint32x2_t b", + "const int n" ], "return_type": { - "value": "float32_t" + "value": "uint32x2_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 3 + "a": { + "register": "Vn.8B" }, - "vec": { - "register": "Vn.4S" + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 1 } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdups_laneq_s32", + "name": "vext_u64", "arguments": [ - "int32x4_t vec", - "const int lane" + "uint64x1_t a", + "uint64x1_t b", + "const int n" ], "return_type": { - "value": "int32_t" + "value": "uint64x1_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 3 + "a": { + "register": "Vn.8B" }, - "vec": { - "register": "Vn.4S" + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 0 } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "vdups_laneq_u32", + "name": "vext_u8", "arguments": [ - "uint32x4_t vec", - "const int lane" + "uint8x8_t a", + "uint8x8_t b", + "const int n" ], "return_type": { - "value": "uint32_t" + "value": "uint8x8_t" }, "Arguments_Preparation": { - "lane": { - "minimum": 0, - "maximum": 3 + "a": { + "register": "Vn.8B" }, - "vec": { - "register": "Vn.4S" + "b": { + "register": "Vm.8B" + }, + "n": { + "minimum": 0, + "maximum": 7 } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "DUP" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "veor3q_s16", + "name": "vextq_f16", "arguments": [ - "int16x8_t a", - "int16x8_t b", - "int16x8_t c" + "float16x8_t a", + "float16x8_t b", + "const int n" ], "return_type": { - "value": "int16x8_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { "register": "Vn.16B" }, - "b": {}, - "c": {} + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "EOR3" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "veor3q_s32", + "name": "vextq_f32", "arguments": [ - "int32x4_t a", - "int32x4_t b", - "int32x4_t c" + "float32x4_t a", + "float32x4_t b", + "const int n" ], "return_type": { - "value": "int32x4_t" + "value": "float32x4_t" }, "Arguments_Preparation": { "a": { "register": "Vn.16B" }, - "b": {}, - "c": {} + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 3 + } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "EOR3" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "veor3q_s64", + "name": "vextq_f64", "arguments": [ - "int64x2_t a", - "int64x2_t b", - "int64x2_t c" + "float64x2_t a", + "float64x2_t b", + "const int n" ], "return_type": { - "value": "int64x2_t" + "value": "float64x2_t" }, "Arguments_Preparation": { "a": { "register": "Vn.16B" }, - "b": {}, - "c": {} + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 1 + } }, "Architectures": [ "A64" ], "instructions": [ [ - "EOR3" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "veor3q_s8", + "name": "vextq_p16", "arguments": [ - "int8x16_t a", - "int8x16_t b", - "int8x16_t c" + "poly16x8_t a", + "poly16x8_t b", + "const int n" ], "return_type": { - "value": "int8x16_t" + "value": "poly16x8_t" }, "Arguments_Preparation": { "a": { "register": "Vn.16B" }, - "b": {}, - "c": {} + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "EOR3" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "veor3q_u16", + "name": "vextq_p64", "arguments": [ - "uint16x8_t a", - "uint16x8_t b", - "uint16x8_t c" + "poly64x2_t a", + "poly64x2_t b", + "const int n" ], "return_type": { - "value": "uint16x8_t" + "value": "poly64x2_t" }, "Arguments_Preparation": { "a": { "register": "Vn.16B" }, - "b": {}, - "c": {} + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 1 + } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "EOR3" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "veor3q_u32", + "name": "vextq_p8", "arguments": [ - "uint32x4_t a", - "uint32x4_t b", - "uint32x4_t c" + "poly8x16_t a", + "poly8x16_t b", + "const int n" ], "return_type": { - "value": "uint32x4_t" + "value": "poly8x16_t" }, "Arguments_Preparation": { "a": { "register": "Vn.16B" }, - "b": {}, - "c": {} + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 15 + } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "EOR3" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "veor3q_u64", + "name": "vextq_s16", "arguments": [ - "uint64x2_t a", - "uint64x2_t b", - "uint64x2_t c" + "int16x8_t a", + "int16x8_t b", + "const int n" ], "return_type": { - "value": "uint64x2_t" + "value": "int16x8_t" }, "Arguments_Preparation": { "a": { "register": "Vn.16B" }, - "b": {}, - "c": {} + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 7 + } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "EOR3" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "veor3q_u8", + "name": "vextq_s32", "arguments": [ - "uint8x16_t a", - "uint8x16_t b", - "uint8x16_t c" + "int32x4_t a", + "int32x4_t b", + "const int n" ], "return_type": { - "value": "uint8x16_t" + "value": "int32x4_t" }, "Arguments_Preparation": { "a": { "register": "Vn.16B" }, - "b": {}, - "c": {} + "b": { + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 3 + } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ [ - "EOR3" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "veor_s16", + "name": "vextq_s64", "arguments": [ - "int16x4_t a", - "int16x4_t b" + "int64x2_t a", + "int64x2_t b", + "const int n" ], "return_type": { - "value": "int16x4_t" + "value": "int64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Vn.16B" }, "b": { - "register": "Vm.8B" + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 1 } }, "Architectures": [ @@ -23534,26 +29463,31 @@ ], "instructions": [ [ - "EOR" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "veor_s32", + "name": "vextq_s8", "arguments": [ - "int32x2_t a", - "int32x2_t b" + "int8x16_t a", + "int8x16_t b", + "const int n" ], "return_type": { - "value": "int32x2_t" + "value": "int8x16_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Vn.16B" }, "b": { - "register": "Vm.8B" + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 15 } }, "Architectures": [ @@ -23563,26 +29497,31 @@ ], "instructions": [ [ - "EOR" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "veor_s64", + "name": "vextq_u16", "arguments": [ - "int64x1_t a", - "int64x1_t b" + "uint16x8_t a", + "uint16x8_t b", + "const int n" ], "return_type": { - "value": "int64x1_t" + "value": "uint16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Vn.16B" }, "b": { - "register": "Vm.8B" + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 7 } }, "Architectures": [ @@ -23592,26 +29531,31 @@ ], "instructions": [ [ - "EOR" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "veor_s8", + "name": "vextq_u32", "arguments": [ - "int8x8_t a", - "int8x8_t b" + "uint32x4_t a", + "uint32x4_t b", + "const int n" ], "return_type": { - "value": "int8x8_t" + "value": "uint32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Vn.16B" }, "b": { - "register": "Vm.8B" + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 3 } }, "Architectures": [ @@ -23621,26 +29565,31 @@ ], "instructions": [ [ - "EOR" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "veor_u16", + "name": "vextq_u64", "arguments": [ - "uint16x4_t a", - "uint16x4_t b" + "uint64x2_t a", + "uint64x2_t b", + "const int n" ], "return_type": { - "value": "uint16x4_t" + "value": "uint64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Vn.16B" }, "b": { - "register": "Vm.8B" + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 1 } }, "Architectures": [ @@ -23650,26 +29599,31 @@ ], "instructions": [ [ - "EOR" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "veor_u32", + "name": "vextq_u8", "arguments": [ - "uint32x2_t a", - "uint32x2_t b" + "uint8x16_t a", + "uint8x16_t b", + "const int n" ], "return_type": { - "value": "uint32x2_t" + "value": "uint8x16_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Vn.16B" }, "b": { - "register": "Vm.8B" + "register": "Vm.16B" + }, + "n": { + "minimum": 0, + "maximum": 15 } }, "Architectures": [ @@ -23679,55 +29633,62 @@ ], "instructions": [ [ - "EOR" + "EXT" ] ] }, { "SIMD_ISA": "Neon", - "name": "veor_u64", + "name": "vfma_f16", "arguments": [ - "uint64x1_t a", - "uint64x1_t b" + "float16x4_t a", + "float16x4_t b", + "float16x4_t c" ], "return_type": { - "value": "uint64x1_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Vd.4H" }, "b": { - "register": "Vm.8B" + "register": "Vn.4H" + }, + "c": { + "register": "Vm.4H" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "EOR" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "veor_u8", + "name": "vfma_f32", "arguments": [ - "uint8x8_t a", - "uint8x8_t b" + "float32x2_t a", + "float32x2_t b", + "float32x2_t c" ], "return_type": { - "value": "uint8x8_t" + "value": "float32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Vd.2S" }, "b": { - "register": "Vm.8B" + "register": "Vn.2S" + }, + "c": { + "register": "Vm.2S" } }, "Architectures": [ @@ -23737,263 +29698,308 @@ ], "instructions": [ [ - "EOR" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "veorq_s16", + "name": "vfma_f64", "arguments": [ - "int16x8_t a", - "int16x8_t b" + "float64x1_t a", + "float64x1_t b", + "float64x1_t c" ], "return_type": { - "value": "int16x8_t" + "value": "float64x1_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.16B" + "register": "Da" }, "b": { - "register": "Vm.16B" + "register": "Dn" + }, + "c": { + "register": "Dm" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EOR" + "FMADD" ] ] }, { "SIMD_ISA": "Neon", - "name": "veorq_s32", + "name": "vfma_lane_f16", "arguments": [ - "int32x4_t a", - "int32x4_t b" + "float16x4_t a", + "float16x4_t b", + "float16x4_t v", + "const int lane" ], "return_type": { - "value": "int32x4_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.16B" + "register": "Vd.4H" }, "b": { - "register": "Vm.16B" + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EOR" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "veorq_s64", + "name": "vfma_lane_f32", "arguments": [ - "int64x2_t a", - "int64x2_t b" + "float32x2_t a", + "float32x2_t b", + "float32x2_t v", + "const int lane" ], "return_type": { - "value": "int64x2_t" + "value": "float32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.16B" + "register": "Vd.2S" }, "b": { - "register": "Vm.16B" + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2S" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EOR" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "veorq_s8", + "name": "vfma_lane_f64", "arguments": [ - "int8x16_t a", - "int8x16_t b" + "float64x1_t a", + "float64x1_t b", + "float64x1_t v", + "const int lane" ], "return_type": { - "value": "int8x16_t" + "value": "float64x1_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.16B" + "register": "Dd" }, "b": { - "register": "Vm.16B" + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vm.1D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EOR" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "veorq_u16", + "name": "vfma_laneq_f16", "arguments": [ - "uint16x8_t a", - "uint16x8_t b" + "float16x4_t a", + "float16x4_t b", + "float16x8_t v", + "const int lane" ], "return_type": { - "value": "uint16x8_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.16B" + "register": "Vd.4H" }, "b": { - "register": "Vm.16B" + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EOR" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "veorq_u32", + "name": "vfma_laneq_f32", "arguments": [ - "uint32x4_t a", - "uint32x4_t b" + "float32x2_t a", + "float32x2_t b", + "float32x4_t v", + "const int lane" ], "return_type": { - "value": "uint32x4_t" + "value": "float32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.16B" + "register": "Vd.2S" }, "b": { - "register": "Vm.16B" + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EOR" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "veorq_u64", + "name": "vfma_laneq_f64", "arguments": [ - "uint64x2_t a", - "uint64x2_t b" + "float64x1_t a", + "float64x1_t b", + "float64x2_t v", + "const int lane" ], "return_type": { - "value": "uint64x2_t" + "value": "float64x1_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.16B" + "register": "Dd" }, "b": { - "register": "Vm.16B" + "register": "Dn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vm.2D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EOR" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "veorq_u8", + "name": "vfma_n_f16", "arguments": [ - "uint8x16_t a", - "uint8x16_t b" + "float16x4_t a", + "float16x4_t b", + "float16_t n" ], "return_type": { - "value": "uint8x16_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.16B" + "register": "Vd.4H " }, "b": { - "register": "Vm.16B" + "register": "Vn.4H" + }, + "n": { + "register": "Vm.H[0]" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EOR" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vext_f32", + "name": "vfma_n_f32", "arguments": [ "float32x2_t a", "float32x2_t b", - "const int n" + "float32_t n" ], "return_type": { "value": "float32x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Vd.2S" }, "b": { - "register": "Vm.8B" + "register": "Vn.2S" }, "n": { - "minimum": 0, - "maximum": 1 + "register": "Vm.S[0]" } }, "Architectures": [ @@ -24003,31 +30009,30 @@ ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vext_f64", + "name": "vfma_n_f64", "arguments": [ "float64x1_t a", "float64x1_t b", - "const int n" + "float64_t n" ], "return_type": { "value": "float64x1_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Da" }, "b": { - "register": "Vm.8B" + "register": "Dn" }, "n": { - "minimum": 0, - "maximum": 0 + "register": "Dm" } }, "Architectures": [ @@ -24035,234 +30040,238 @@ ], "instructions": [ [ - "EXT" + "FMADD" ] ] }, { "SIMD_ISA": "Neon", - "name": "vext_p16", + "name": "vfmad_lane_f64", "arguments": [ - "poly16x4_t a", - "poly16x4_t b", - "const int n" + "float64_t a", + "float64_t b", + "float64x1_t v", + "const int lane" ], "return_type": { - "value": "poly16x4_t" + "value": "float64_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Dd" }, "b": { - "register": "Vm.8B" + "register": "Dn" }, - "n": { + "lane": { "minimum": 0, - "maximum": 3 + "maximum": 0 + }, + "v": { + "register": "Vm.1D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vext_p64", + "name": "vfmad_laneq_f64", "arguments": [ - "poly64x1_t a", - "poly64x1_t b", - "const int n" + "float64_t a", + "float64_t b", + "float64x2_t v", + "const int lane" ], "return_type": { - "value": "poly64x1_t" + "value": "float64_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Dd" }, "b": { - "register": "Vm.8B" + "register": "Dn" }, - "n": { + "lane": { "minimum": 0, - "maximum": 0 + "maximum": 1 + }, + "v": { + "register": "Vm.2D" } }, "Architectures": [ - "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vext_p8", + "name": "vfmah_f16", "arguments": [ - "poly8x8_t a", - "poly8x8_t b", - "const int n" + "float16_t a", + "float16_t b", + "float16_t c" ], "return_type": { - "value": "poly8x8_t" + "value": "float16_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Ha" }, "b": { - "register": "Vm.8B" + "register": "Hn" }, - "n": { - "minimum": 0, - "maximum": 7 + "c": { + "register": "Hm" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "EXT" + "FMADD" ] ] }, { "SIMD_ISA": "Neon", - "name": "vext_s16", + "name": "vfmah_lane_f16", "arguments": [ - "int16x4_t a", - "int16x4_t b", - "const int n" + "float16_t a", + "float16_t b", + "float16x4_t v", + "const int lane" ], "return_type": { - "value": "int16x4_t" + "value": "float16_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Hd" }, "b": { - "register": "Vm.8B" + "register": "Hn" }, - "n": { + "lane": { "minimum": 0, "maximum": 3 + }, + "v": { + "register": "Vm.4H" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vext_s32", + "name": "vfmah_laneq_f16", "arguments": [ - "int32x2_t a", - "int32x2_t b", - "const int n" + "float16_t a", + "float16_t b", + "float16x8_t v", + "const int lane" ], "return_type": { - "value": "int32x2_t" + "value": "float16_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Hd" }, "b": { - "register": "Vm.8B" + "register": "Hn" }, - "n": { + "lane": { "minimum": 0, - "maximum": 1 + "maximum": 7 + }, + "v": { + "register": "Vm.8H" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vext_s64", + "name": "vfmaq_f16", "arguments": [ - "int64x1_t a", - "int64x1_t b", - "const int n" + "float16x8_t a", + "float16x8_t b", + "float16x8_t c" ], "return_type": { - "value": "int64x1_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Vd.8H" }, "b": { - "register": "Vm.8B" + "register": "Vn.8H" }, - "n": { - "minimum": 0, - "maximum": 0 + "c": { + "register": "Vm.8H" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vext_s8", + "name": "vfmaq_f32", "arguments": [ - "int8x8_t a", - "int8x8_t b", - "const int n" + "float32x4_t a", + "float32x4_t b", + "float32x4_t c" ], "return_type": { - "value": "int8x8_t" + "value": "float32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Vd.4S" }, "b": { - "register": "Vm.8B" + "register": "Vn.4S" }, - "n": { - "minimum": 0, - "maximum": 7 + "c": { + "register": "Vm.4S" } }, "Architectures": [ @@ -24272,201 +30281,210 @@ ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vext_u16", + "name": "vfmaq_f64", "arguments": [ - "uint16x4_t a", - "uint16x4_t b", - "const int n" + "float64x2_t a", + "float64x2_t b", + "float64x2_t c" ], "return_type": { - "value": "uint16x4_t" + "value": "float64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Vd.2D" }, "b": { - "register": "Vm.8B" + "register": "Vn.2D" }, - "n": { - "minimum": 0, - "maximum": 3 + "c": { + "register": "Vm.2D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vext_u32", + "name": "vfmaq_lane_f16", "arguments": [ - "uint32x2_t a", - "uint32x2_t b", - "const int n" + "float16x8_t a", + "float16x8_t b", + "float16x4_t v", + "const int lane" ], "return_type": { - "value": "uint32x2_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Vd.8H" }, "b": { - "register": "Vm.8B" + "register": "Vn.8H" }, - "n": { + "lane": { "minimum": 0, - "maximum": 1 + "maximum": 3 + }, + "v": { + "register": "Vm.4H" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vext_u64", + "name": "vfmaq_lane_f32", "arguments": [ - "uint64x1_t a", - "uint64x1_t b", - "const int n" + "float32x4_t a", + "float32x4_t b", + "float32x2_t v", + "const int lane" ], "return_type": { - "value": "uint64x1_t" + "value": "float32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Vd.4S" }, "b": { - "register": "Vm.8B" + "register": "Vn.4S" }, - "n": { + "lane": { "minimum": 0, - "maximum": 0 + "maximum": 1 + }, + "v": { + "register": "Vm.2S" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vext_u8", + "name": "vfmaq_lane_f64", "arguments": [ - "uint8x8_t a", - "uint8x8_t b", - "const int n" + "float64x2_t a", + "float64x2_t b", + "float64x1_t v", + "const int lane" ], "return_type": { - "value": "uint8x8_t" + "value": "float64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.8B" + "register": "Vd.2D" }, "b": { - "register": "Vm.8B" + "register": "Vn.2D" }, - "n": { + "lane": { "minimum": 0, - "maximum": 7 + "maximum": 0 + }, + "v": { + "register": "Vm.1D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vextq_f32", + "name": "vfmaq_laneq_f16", "arguments": [ - "float32x4_t a", - "float32x4_t b", - "const int n" + "float16x8_t a", + "float16x8_t b", + "float16x8_t v", + "const int lane" ], "return_type": { - "value": "float32x4_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.16B" + "register": "Vd.8H" }, "b": { - "register": "Vm.16B" + "register": "Vn.8H" }, - "n": { + "lane": { "minimum": 0, - "maximum": 3 + "maximum": 7 + }, + "v": { + "register": "Vm.8H" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vextq_f64", + "name": "vfmaq_laneq_f32", "arguments": [ - "float64x2_t a", - "float64x2_t b", - "const int n" + "float32x4_t a", + "float32x4_t b", + "float32x4_t v", + "const int lane" ], "return_type": { - "value": "float64x2_t" + "value": "float32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.16B" + "register": "Vd.4S" }, "b": { - "register": "Vm.16B" + "register": "Vn.4S" }, - "n": { + "lane": { "minimum": 0, - "maximum": 1 + "maximum": 3 + }, + "v": { + "register": "Vm.4S" } }, "Architectures": [ @@ -24474,98 +30492,97 @@ ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vextq_p16", + "name": "vfmaq_laneq_f64", "arguments": [ - "poly16x8_t a", - "poly16x8_t b", - "const int n" + "float64x2_t a", + "float64x2_t b", + "float64x2_t v", + "const int lane" ], "return_type": { - "value": "poly16x8_t" + "value": "float64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.16B" + "register": "Vd.2D" }, "b": { - "register": "Vm.16B" + "register": "Vn.2D" }, - "n": { + "lane": { "minimum": 0, - "maximum": 7 + "maximum": 1 + }, + "v": { + "register": "Vm.2D" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vextq_p64", + "name": "vfmaq_n_f16", "arguments": [ - "poly64x2_t a", - "poly64x2_t b", - "const int n" + "float16x8_t a", + "float16x8_t b", + "float16_t n" ], "return_type": { - "value": "poly64x2_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.16B" + "register": "Vd.8H " }, "b": { - "register": "Vm.16B" + "register": "Vn.8H" }, "n": { - "minimum": 0, - "maximum": 1 + "register": "Vm.H[0]" } }, "Architectures": [ - "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vextq_p8", + "name": "vfmaq_n_f32", "arguments": [ - "poly8x16_t a", - "poly8x16_t b", - "const int n" + "float32x4_t a", + "float32x4_t b", + "float32_t n" ], "return_type": { - "value": "poly8x16_t" + "value": "float32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.16B" + "register": "Vd.4S" }, "b": { - "register": "Vm.16B" + "register": "Vn.4S" }, "n": { - "minimum": 0, - "maximum": 15 + "register": "Vm.S[0]" } }, "Architectures": [ @@ -24575,967 +30592,894 @@ ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vextq_s16", + "name": "vfmaq_n_f64", "arguments": [ - "int16x8_t a", - "int16x8_t b", - "const int n" + "float64x2_t a", + "float64x2_t b", + "float64_t n" ], "return_type": { - "value": "int16x8_t" + "value": "float64x2_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.16B" + "register": "Vd.2D" }, "b": { - "register": "Vm.16B" + "register": "Vn.2D" }, "n": { - "minimum": 0, - "maximum": 7 + "register": "Vm.D[0]" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vextq_s32", + "name": "vfmas_lane_f32", "arguments": [ - "int32x4_t a", - "int32x4_t b", - "const int n" + "float32_t a", + "float32_t b", + "float32x2_t v", + "const int lane" ], "return_type": { - "value": "int32x4_t" + "value": "float32_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.16B" + "register": "Sd" }, "b": { - "register": "Vm.16B" + "register": "Sn" }, - "n": { + "lane": { "minimum": 0, - "maximum": 3 + "maximum": 1 + }, + "v": { + "register": "Vm.2S" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vextq_s64", + "name": "vfmas_laneq_f32", "arguments": [ - "int64x2_t a", - "int64x2_t b", - "const int n" + "float32_t a", + "float32_t b", + "float32x4_t v", + "const int lane" ], "return_type": { - "value": "int64x2_t" + "value": "float32_t" }, "Arguments_Preparation": { "a": { - "register": "Vn.16B" + "register": "Sd" }, "b": { - "register": "Vm.16B" + "register": "Sn" }, - "n": { + "lane": { "minimum": 0, - "maximum": 1 + "maximum": 3 + }, + "v": { + "register": "Vm.4S" } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vextq_s8", + "name": "vfmlal_high_f16", "arguments": [ - "int8x16_t a", - "int8x16_t b", - "const int n" + "float32x2_t r", + "float16x4_t a", + "float16x4_t b" ], "return_type": { - "value": "int8x16_t" + "value": "float32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.16B" - }, - "b": { - "register": "Vm.16B" - }, - "n": { - "minimum": 0, - "maximum": 15 + "a": {}, + "b": {}, + "r": { + "register": "Vd.2S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLAL2" ] ] }, { "SIMD_ISA": "Neon", - "name": "vextq_u16", + "name": "vfmlal_lane_high_f16", "arguments": [ - "uint16x8_t a", - "uint16x8_t b", - "const int n" + "float32x2_t r", + "float16x4_t a", + "float16x4_t b", + "const int lane" ], "return_type": { - "value": "uint16x8_t" + "value": "float32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.16B" - }, - "b": { - "register": "Vm.16B" - }, - "n": { + "a": {}, + "b": {}, + "lane": { "minimum": 0, - "maximum": 7 + "maximum": 3 + }, + "r": { + "register": "Vd.2S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLAL2" ] ] }, { "SIMD_ISA": "Neon", - "name": "vextq_u32", + "name": "vfmlal_lane_low_f16", "arguments": [ - "uint32x4_t a", - "uint32x4_t b", - "const int n" + "float32x2_t r", + "float16x4_t a", + "float16x4_t b", + "const int lane" ], "return_type": { - "value": "uint32x4_t" + "value": "float32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.16B" - }, - "b": { - "register": "Vm.16B" - }, - "n": { + "a": {}, + "b": {}, + "lane": { "minimum": 0, "maximum": 3 + }, + "r": { + "register": "Vd.2S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLAL" ] ] }, { "SIMD_ISA": "Neon", - "name": "vextq_u64", + "name": "vfmlal_laneq_high_f16", "arguments": [ - "uint64x2_t a", - "uint64x2_t b", - "const int n" + "float32x2_t r", + "float16x4_t a", + "float16x8_t b", + "const int lane" ], "return_type": { - "value": "uint64x2_t" + "value": "float32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.16B" - }, - "b": { - "register": "Vm.16B" - }, - "n": { + "a": {}, + "b": {}, + "lane": { "minimum": 0, - "maximum": 1 + "maximum": 7 + }, + "r": { + "register": "Vd.2S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLAL2" ] ] }, { "SIMD_ISA": "Neon", - "name": "vextq_u8", + "name": "vfmlal_laneq_low_f16", "arguments": [ - "uint8x16_t a", - "uint8x16_t b", - "const int n" + "float32x2_t r", + "float16x4_t a", + "float16x8_t b", + "const int lane" ], "return_type": { - "value": "uint8x16_t" + "value": "float32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.16B" - }, - "b": { - "register": "Vm.16B" - }, - "n": { + "a": {}, + "b": {}, + "lane": { "minimum": 0, - "maximum": 15 + "maximum": 7 + }, + "r": { + "register": "Vd.2S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "EXT" + "FMLAL" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfma_f32", + "name": "vfmlal_low_f16", "arguments": [ - "float32x2_t a", - "float32x2_t b", - "float32x2_t c" + "float32x2_t r", + "float16x4_t a", + "float16x4_t b" ], "return_type": { "value": "float32x2_t" }, "Arguments_Preparation": { - "a": { + "a": {}, + "b": {}, + "r": { "register": "Vd.2S" - }, - "b": { - "register": "Vn.2S" - }, - "c": { - "register": "Vm.2S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLAL" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfma_f64", + "name": "vfmlalq_high_f16", "arguments": [ - "float64x1_t a", - "float64x1_t b", - "float64x1_t c" + "float32x4_t r", + "float16x8_t a", + "float16x8_t b" ], "return_type": { - "value": "float64x1_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Da" - }, - "b": { - "register": "Dn" - }, - "c": { - "register": "Dm" + "a": {}, + "b": {}, + "r": { + "register": "Vd.4S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FMADD" + "FMLAL2" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfma_lane_f32", + "name": "vfmlalq_lane_high_f16", "arguments": [ - "float32x2_t a", - "float32x2_t b", - "float32x2_t v", + "float32x4_t r", + "float16x8_t a", + "float16x4_t b", "const int lane" ], "return_type": { - "value": "float32x2_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vd.2S" - }, - "b": { - "register": "Vn.2S" - }, + "a": {}, + "b": {}, "lane": { "minimum": 0, - "maximum": 1 + "maximum": 3 }, - "v": { - "register": "Vm.2S" + "r": { + "register": "Vd.4S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLAL2" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfma_lane_f64", + "name": "vfmlalq_lane_low_f16", "arguments": [ - "float64x1_t a", - "float64x1_t b", - "float64x1_t v", + "float32x4_t r", + "float16x8_t a", + "float16x4_t b", "const int lane" ], "return_type": { - "value": "float64x1_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Dd" - }, - "b": { - "register": "Dn" - }, + "a": {}, + "b": {}, "lane": { "minimum": 0, - "maximum": 0 + "maximum": 3 }, - "v": { - "register": "Vm.1D" + "r": { + "register": "Vd.4S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLAL" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfma_laneq_f32", + "name": "vfmlalq_laneq_high_f16", "arguments": [ - "float32x2_t a", - "float32x2_t b", - "float32x4_t v", + "float32x4_t r", + "float16x8_t a", + "float16x8_t b", "const int lane" ], "return_type": { - "value": "float32x2_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vd.2S" - }, - "b": { - "register": "Vn.2S" - }, + "a": {}, + "b": {}, "lane": { "minimum": 0, - "maximum": 3 + "maximum": 7 }, - "v": { - "register": "Vm.4S" + "r": { + "register": "Vd.4S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLAL2" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfma_laneq_f64", + "name": "vfmlalq_laneq_low_f16", "arguments": [ - "float64x1_t a", - "float64x1_t b", - "float64x2_t v", + "float32x4_t r", + "float16x8_t a", + "float16x8_t b", "const int lane" ], "return_type": { - "value": "float64x1_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Dd" - }, - "b": { - "register": "Dn" - }, + "a": {}, + "b": {}, "lane": { "minimum": 0, - "maximum": 1 + "maximum": 7 }, - "v": { - "register": "Vm.2D" + "r": { + "register": "Vd.4S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLAL" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfma_n_f32", + "name": "vfmlalq_low_f16", "arguments": [ - "float32x2_t a", - "float32x2_t b", - "float32_t n" + "float32x4_t r", + "float16x8_t a", + "float16x8_t b" ], "return_type": { - "value": "float32x2_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vd.2S" - }, - "b": { - "register": "Vn.2S" - }, - "n": { - "register": "Vm.S[0]" + "a": {}, + "b": {}, + "r": { + "register": "Vd.4S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLAL" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfma_n_f64", + "name": "vfmlsl_high_f16", "arguments": [ - "float64x1_t a", - "float64x1_t b", - "float64_t n" + "float32x2_t r", + "float16x4_t a", + "float16x4_t b" ], "return_type": { - "value": "float64x1_t" + "value": "float32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Da" - }, - "b": { - "register": "Dn" - }, - "n": { - "register": "Dm" + "a": {}, + "b": {}, + "r": { + "register": "Vd.2S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FMADD" + "FMLSL2" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfmad_lane_f64", + "name": "vfmlsl_lane_high_f16", "arguments": [ - "float64_t a", - "float64_t b", - "float64x1_t v", + "float32x2_t r", + "float16x4_t a", + "float16x4_t b", "const int lane" ], "return_type": { - "value": "float64_t" + "value": "float32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Dd" - }, - "b": { - "register": "Dn" - }, + "a": {}, + "b": {}, "lane": { "minimum": 0, - "maximum": 0 + "maximum": 3 }, - "v": { - "register": "Vm.1D" + "r": { + "register": "Vd.2S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLSL2" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfmad_laneq_f64", + "name": "vfmlsl_lane_low_f16", "arguments": [ - "float64_t a", - "float64_t b", - "float64x2_t v", + "float32x2_t r", + "float16x4_t a", + "float16x4_t b", "const int lane" ], "return_type": { - "value": "float64_t" + "value": "float32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Dd" - }, - "b": { - "register": "Dn" - }, + "a": {}, + "b": {}, "lane": { "minimum": 0, - "maximum": 1 + "maximum": 3 }, - "v": { - "register": "Vm.2D" + "r": { + "register": "Vd.2S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLSL" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfmaq_f32", + "name": "vfmlsl_laneq_high_f16", "arguments": [ - "float32x4_t a", - "float32x4_t b", - "float32x4_t c" + "float32x2_t r", + "float16x4_t a", + "float16x8_t b", + "const int lane" ], "return_type": { - "value": "float32x4_t" + "value": "float32x2_t" }, - "Arguments_Preparation": { - "a": { - "register": "Vd.4S" - }, - "b": { - "register": "Vn.4S" + "Arguments_Preparation": { + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 7 }, - "c": { - "register": "Vm.4S" + "r": { + "register": "Vd.2S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLSL2" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfmaq_f64", + "name": "vfmlsl_laneq_low_f16", "arguments": [ - "float64x2_t a", - "float64x2_t b", - "float64x2_t c" + "float32x2_t r", + "float16x4_t a", + "float16x8_t b", + "const int lane" ], "return_type": { - "value": "float64x2_t" + "value": "float32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Vd.2D" - }, - "b": { - "register": "Vn.2D" + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 7 }, - "c": { - "register": "Vm.2D" + "r": { + "register": "Vd.2S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLSL" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfmaq_lane_f32", + "name": "vfmlsl_low_f16", "arguments": [ - "float32x4_t a", - "float32x4_t b", - "float32x2_t v", - "const int lane" + "float32x2_t r", + "float16x4_t a", + "float16x4_t b" ], "return_type": { - "value": "float32x4_t" + "value": "float32x2_t" }, "Arguments_Preparation": { - "a": { - "register": "Vd.4S" - }, - "b": { - "register": "Vn.4S" - }, - "lane": { - "minimum": 0, - "maximum": 1 - }, - "v": { - "register": "Vm.2S" + "a": {}, + "b": {}, + "r": { + "register": "Vd.2S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLSL" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfmaq_lane_f64", + "name": "vfmlslq_high_f16", "arguments": [ - "float64x2_t a", - "float64x2_t b", - "float64x1_t v", - "const int lane" + "float32x4_t r", + "float16x8_t a", + "float16x8_t b" ], "return_type": { - "value": "float64x2_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vd.2D" - }, - "b": { - "register": "Vn.2D" - }, - "lane": { - "minimum": 0, - "maximum": 0 - }, - "v": { - "register": "Vm.1D" + "a": {}, + "b": {}, + "r": { + "register": "Vd.4S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLSL2" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfmaq_laneq_f32", + "name": "vfmlslq_lane_high_f16", "arguments": [ - "float32x4_t a", - "float32x4_t b", - "float32x4_t v", + "float32x4_t r", + "float16x8_t a", + "float16x4_t b", "const int lane" ], "return_type": { "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vd.4S" - }, - "b": { - "register": "Vn.4S" - }, + "a": {}, + "b": {}, "lane": { "minimum": 0, "maximum": 3 }, - "v": { - "register": "Vm.4S" + "r": { + "register": "Vd.4S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLSL2" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfmaq_laneq_f64", + "name": "vfmlslq_lane_low_f16", "arguments": [ - "float64x2_t a", - "float64x2_t b", - "float64x2_t v", + "float32x4_t r", + "float16x8_t a", + "float16x4_t b", "const int lane" ], "return_type": { - "value": "float64x2_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vd.2D" - }, - "b": { - "register": "Vn.2D" - }, + "a": {}, + "b": {}, "lane": { "minimum": 0, - "maximum": 1 + "maximum": 3 }, - "v": { - "register": "Vm.2D" + "r": { + "register": "Vd.4S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLSL" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfmaq_n_f32", + "name": "vfmlslq_laneq_high_f16", "arguments": [ - "float32x4_t a", - "float32x4_t b", - "float32_t n" + "float32x4_t r", + "float16x8_t a", + "float16x8_t b", + "const int lane" ], "return_type": { "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vd.4S" - }, - "b": { - "register": "Vn.4S" + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 7 }, - "n": { - "register": "Vm.S[0]" + "r": { + "register": "Vd.4S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLSL2" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfmaq_n_f64", + "name": "vfmlslq_laneq_low_f16", "arguments": [ - "float64x2_t a", - "float64x2_t b", - "float64_t n" + "float32x4_t r", + "float16x8_t a", + "float16x8_t b", + "const int lane" ], "return_type": { - "value": "float64x2_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vd.2D" - }, - "b": { - "register": "Vn.2D" + "a": {}, + "b": {}, + "lane": { + "minimum": 0, + "maximum": 7 }, - "n": { - "register": "Vm.D[0]" + "r": { + "register": "Vd.4S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLSL" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfmas_lane_f32", + "name": "vfmlslq_low_f16", "arguments": [ - "float32_t a", - "float32_t b", - "float32x2_t v", - "const int lane" + "float32x4_t r", + "float16x8_t a", + "float16x8_t b" ], "return_type": { - "value": "float32_t" + "value": "float32x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Sd" - }, - "b": { - "register": "Sn" - }, - "lane": { - "minimum": 0, - "maximum": 1 - }, - "v": { - "register": "Vm.2S" + "a": {}, + "b": {}, + "r": { + "register": "Vd.4S" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLSL" ] ] }, { "SIMD_ISA": "Neon", - "name": "vfmas_laneq_f32", + "name": "vfms_f16", "arguments": [ - "float32_t a", - "float32_t b", - "float32x4_t v", - "const int lane" + "float16x4_t a", + "float16x4_t b", + "float16x4_t c" ], "return_type": { - "value": "float32_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": { - "register": "Sd" + "register": "Vd.4H" }, "b": { - "register": "Sn" - }, - "lane": { - "minimum": 0, - "maximum": 3 + "register": "Vn.4H" }, - "v": { - "register": "Vm.4S" + "c": { + "register": "Vm.4H" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "FMLA" + "FMLS" ] ] }, @@ -25603,6 +31547,42 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vfms_lane_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vfms_lane_f32", @@ -25675,6 +31655,42 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vfms_laneq_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + }, + "b": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vfms_laneq_f32", @@ -25747,6 +31763,37 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vfms_n_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b", + "float16_t n" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H " + }, + "b": { + "register": "Vn.4H" + }, + "n": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vfms_n_f32", @@ -25881,6 +31928,142 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vfmsh_f16", + "arguments": [ + "float16_t a", + "float16_t b", + "float16_t c" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Ha" + }, + "b": { + "register": "Hn" + }, + "c": { + "register": "Hm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMSUB" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsh_lane_f16", + "arguments": [ + "float16_t a", + "float16_t b", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hd" + }, + "b": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsh_laneq_f16", + "arguments": [ + "float16_t a", + "float16_t b", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hd" + }, + "b": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vfmsq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b", + "float16x8_t c" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vfmsq_f32", @@ -25945,6 +32128,42 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vfmsq_lane_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vfmsq_lane_f32", @@ -26017,6 +32236,42 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vfmsq_laneq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vfmsq_laneq_f32", @@ -26089,6 +32344,37 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vfmsq_n_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b", + "float16_t n" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H " + }, + "b": { + "register": "Vn.8H" + }, + "n": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMLS" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vfmsq_n_f32", @@ -26223,6 +32509,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vget_high_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vget_high_f32", @@ -26545,6 +32856,36 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vget_lane_f16", + "arguments": [ + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vget_lane_f32", @@ -26932,6 +33273,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vget_low_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vget_low_f32", @@ -27254,6 +33620,36 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vgetq_lane_f16", + "arguments": [ + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vgetq_lane_f32", @@ -28337,6 +34733,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld1_dup_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld1_dup_f32", @@ -28659,6 +35080,106 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld1_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_f16_x2", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_f16_x3", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1_f16_x4", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld1_f32", @@ -28851,6 +35372,40 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld1_lane_f16", + "arguments": [ + "float16_t const * ptr", + "float16x4_t src", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld1_lane_f32", @@ -30386,6 +36941,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_dup_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1R" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld1q_dup_f32", @@ -30708,6 +37288,106 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_f16_x2", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_f16_x3", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_f16_x4", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld1q_f32", @@ -30900,6 +37580,40 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld1q_lane_f16", + "arguments": [ + "float16_t const * ptr", + "float16x8_t src", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD1" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld1q_lane_f32", @@ -32435,6 +39149,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld2_dup_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld2_dup_f32", @@ -32757,6 +39496,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld2_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld2_f32", @@ -32805,6 +39569,40 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld2_lane_f16", + "arguments": [ + "float16_t const * ptr", + "float16x4x2_t src", + "const int lane" + ], + "return_type": { + "value": "float16x4x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld2_lane_f32", @@ -33513,6 +40311,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_dup_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2R" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld2q_dup_f32", @@ -33830,6 +40653,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x2_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld2q_f32", @@ -33878,6 +40726,40 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld2q_lane_f16", + "arguments": [ + "float16_t const * ptr", + "float16x8x2_t src", + "const int lane" + ], + "return_type": { + "value": "float16x8x2_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld2q_lane_f32", @@ -34575,6 +41457,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld3_dup_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld3_dup_f32", @@ -34897,6 +41804,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld3_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld3_f32", @@ -34945,6 +41877,40 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld3_lane_f16", + "arguments": [ + "float16_t const * ptr", + "float16x4x3_t src", + "const int lane" + ], + "return_type": { + "value": "float16x4x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld3_lane_f32", @@ -35653,6 +42619,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_dup_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3R" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld3q_dup_f32", @@ -35970,6 +42961,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x3_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld3q_f32", @@ -36018,6 +43034,40 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld3q_lane_f16", + "arguments": [ + "float16_t const * ptr", + "float16x8x3_t src", + "const int lane" + ], + "return_type": { + "value": "float16x8x3_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD3" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld3q_lane_f32", @@ -36715,6 +43765,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld4_dup_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld4_dup_f32", @@ -37037,6 +44112,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld4_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x4x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld4_f32", @@ -37085,6 +44185,40 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld4_lane_f16", + "arguments": [ + "float16_t const * ptr", + "float16x4x4_t src", + "const int lane" + ], + "return_type": { + "value": "float16x4x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld4_lane_f32", @@ -37793,6 +44927,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_dup_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4R" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld4q_dup_f32", @@ -38110,6 +45269,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_f16", + "arguments": [ + "float16_t const * ptr" + ], + "return_type": { + "value": "float16x8x4_t" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld4q_f32", @@ -38158,6 +45342,40 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vld4q_lane_f16", + "arguments": [ + "float16_t const * ptr", + "float16x8x4_t src", + "const int lane" + ], + "return_type": { + "value": "float16x8x4_t" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "src": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "LD4" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vld4q_lane_f32", @@ -38879,6 +46097,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmax_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMAX" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmax_f32", @@ -39109,6 +46355,61 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmaxh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnm_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMAXNM" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmaxnm_f32", @@ -39164,6 +46465,62 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnmh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMAXNM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnmq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMAXNM" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmaxnmq_f32", @@ -39219,6 +46576,29 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnmv_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNMP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmaxnmv_f32", @@ -39242,6 +46622,29 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmaxnmvq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNMP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmaxnmvq_f32", @@ -39288,6 +46691,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmaxq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMAX" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmaxq_f32", @@ -39518,6 +46949,29 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmaxv_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmaxv_f32", @@ -39679,6 +47133,29 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmaxvq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmaxvq_f32", @@ -39863,6 +47340,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmin_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMIN" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmin_f32", @@ -40093,6 +47598,61 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vminh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMIN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminnm_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMINNM" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vminnm_f32", @@ -40148,6 +47708,62 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vminnmh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMINNM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vminnmq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMINNM" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vminnmq_f32", @@ -40203,6 +47819,29 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vminnmv_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNMP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vminnmv_f32", @@ -40226,6 +47865,29 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vminnmvq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNMP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vminnmvq_f32", @@ -40272,6 +47934,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vminq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMIN" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vminq_f32", @@ -40502,6 +48192,29 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vminv_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vminv_f32", @@ -40663,6 +48376,29 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vminvq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vminvq_f32", @@ -46338,113 +54074,145 @@ }, { "SIMD_ISA": "Neon", - "name": "vmlsq_u16", + "name": "vmlsq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t b", + "uint16x8_t c" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + }, + "b": { + "register": "Vn.8H" + }, + "c": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_u32", + "arguments": [ + "uint32x4_t a", + "uint32x4_t b", + "uint32x4_t c" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + }, + "b": { + "register": "Vn.4S" + }, + "c": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmlsq_u8", + "arguments": [ + "uint8x16_t a", + "uint8x16_t b", + "uint8x16_t c" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + }, + "b": { + "register": "Vn.16B" + }, + "c": { + "register": "Vm.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MLS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmmlaq_s32", "arguments": [ - "uint16x8_t a", - "uint16x8_t b", - "uint16x8_t c" + "int32x4_t r", + "int8x16_t a", + "int8x16_t b" ], "return_type": { - "value": "uint16x8_t" + "value": "int32x4_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.8H" + "register": "Vn.16B" }, "b": { - "register": "Vn.8H" + "register": "Vm.16B" }, - "c": { - "register": "Vm.8H" - } - }, - "Architectures": [ - "v7", - "A32", - "A64" - ], - "instructions": [ - [ - "MLS" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vmlsq_u32", - "arguments": [ - "uint32x4_t a", - "uint32x4_t b", - "uint32x4_t c" - ], - "return_type": { - "value": "uint32x4_t" - }, - "Arguments_Preparation": { - "a": { + "r": { "register": "Vd.4S" - }, - "b": { - "register": "Vn.4S" - }, - "c": { - "register": "Vm.4S" } }, "Architectures": [ - "v7", "A32", "A64" ], "instructions": [ [ - "MLS" + "SMMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vmlsq_u8", + "name": "vmmlaq_u32", "arguments": [ + "uint32x4_t r", "uint8x16_t a", - "uint8x16_t b", - "uint8x16_t c" - ], - "return_type": { - "value": "uint8x16_t" - }, - "Arguments_Preparation": { - "a": { - "register": "Vd.16B" - }, - "b": { - "register": "Vn.16B" - }, - "c": { - "register": "Vm.16B" - } - }, - "Architectures": [ - "v7", - "A32", - "A64" - ], - "instructions": [ - [ - "MLS" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vmmlaq_s32", - "arguments": [ - "int32x4_t r", - "int8x16_t a", - "int8x16_t b" + "uint8x16_t b" ], "return_type": { - "value": "int32x4_t" + "value": "uint32x4_t" }, "Arguments_Preparation": { "a": { @@ -46463,39 +54231,32 @@ ], "instructions": [ [ - "SMMLA" + "UMMLA" ] ] }, { "SIMD_ISA": "Neon", - "name": "vmmlaq_u32", + "name": "vmov_n_f16", "arguments": [ - "uint32x4_t r", - "uint8x16_t a", - "uint8x16_t b" + "float16_t value" ], "return_type": { - "value": "uint32x4_t" + "value": "float16x4_t" }, "Arguments_Preparation": { - "a": { - "register": "Vn.16B" - }, - "b": { - "register": "Vm.16B" - }, - "r": { - "register": "Vd.4S" + "value": { + "register": "rn" } }, "Architectures": [ + "v7", "A32", "A64" ], "instructions": [ [ - "UMMLA" + "DUP" ] ] }, @@ -47409,6 +55170,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmovq_n_f16", + "arguments": [ + "float16_t value" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "value": { + "register": "rn" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "DUP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmovq_n_f32", @@ -47707,6 +55493,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmul_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmul_f32", @@ -47763,6 +55577,39 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmul_lane_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmul_lane_f32", @@ -47965,6 +55812,38 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmul_laneq_f16", + "arguments": [ + "float16x4_t a", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmul_laneq_f32", @@ -48095,65 +55974,93 @@ }, { "SIMD_ISA": "Neon", - "name": "vmul_laneq_u16", + "name": "vmul_laneq_u16", + "arguments": [ + "uint16x4_t a", + "uint16x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_laneq_u32", + "arguments": [ + "uint32x2_t a", + "uint32x4_t v", + "const int lane" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.2S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmul_n_f16", "arguments": [ - "uint16x4_t a", - "uint16x8_t v", - "const int lane" + "float16x4_t a", + "float16_t n" ], "return_type": { - "value": "uint16x4_t" + "value": "float16x4_t" }, "Arguments_Preparation": { "a": { "register": "Vn.4H" }, - "lane": { - "minimum": 0, - "maximum": 7 - }, - "v": { - "register": "Vm.8H" - } - }, - "Architectures": [ - "A64" - ], - "instructions": [ - [ - "MUL" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vmul_laneq_u32", - "arguments": [ - "uint32x2_t a", - "uint32x4_t v", - "const int lane" - ], - "return_type": { - "value": "uint32x2_t" - }, - "Arguments_Preparation": { - "a": { - "register": "Vn.2S" - }, - "lane": { - "minimum": 0, - "maximum": 3 - }, - "v": { - "register": "Vm.4S" + "n": { + "register": "Vm.H[0]" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "MUL" + "FMUL" ] ] }, @@ -48596,6 +56503,98 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmulh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulh_lane_f16", + "arguments": [ + "float16_t a", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulh_laneq_f16", + "arguments": [ + "float16_t a", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmull_high_lane_s16", @@ -49788,6 +57787,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmulq_f32", @@ -49844,6 +57871,39 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_lane_f16", + "arguments": [ + "float16x8_t a", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmulq_lane_f32", @@ -50046,6 +58106,38 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_laneq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMUL" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmulq_laneq_f32", @@ -50144,14 +58236,78 @@ }, { "SIMD_ISA": "Neon", - "name": "vmulq_laneq_s32", + "name": "vmulq_laneq_s32", + "arguments": [ + "int32x4_t a", + "int32x4_t v", + "const int lane" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4S" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4S" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_laneq_u16", + "arguments": [ + "uint16x8_t a", + "uint16x8_t v", + "const int lane" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MUL" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulq_laneq_u32", "arguments": [ - "int32x4_t a", - "int32x4_t v", + "uint32x4_t a", + "uint32x4_t v", "const int lane" ], "return_type": { - "value": "int32x4_t" + "value": "uint32x4_t" }, "Arguments_Preparation": { "a": { @@ -50176,65 +58332,29 @@ }, { "SIMD_ISA": "Neon", - "name": "vmulq_laneq_u16", + "name": "vmulq_n_f16", "arguments": [ - "uint16x8_t a", - "uint16x8_t v", - "const int lane" + "float16x8_t a", + "float16_t n" ], "return_type": { - "value": "uint16x8_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { "register": "Vn.8H" }, - "lane": { - "minimum": 0, - "maximum": 7 - }, - "v": { - "register": "Vm.8H" - } - }, - "Architectures": [ - "A64" - ], - "instructions": [ - [ - "MUL" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vmulq_laneq_u32", - "arguments": [ - "uint32x4_t a", - "uint32x4_t v", - "const int lane" - ], - "return_type": { - "value": "uint32x4_t" - }, - "Arguments_Preparation": { - "a": { - "register": "Vn.4S" - }, - "lane": { - "minimum": 0, - "maximum": 3 - }, - "v": { - "register": "Vm.4S" + "n": { + "register": "Vm.H[0]" } }, "Architectures": [ + "A32", "A64" ], "instructions": [ [ - "MUL" + "FMUL" ] ] }, @@ -50677,6 +58797,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmulx_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmulx_f32", @@ -50731,6 +58878,38 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmulx_lane_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmulx_lane_f32", @@ -50795,6 +58974,38 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmulx_laneq_f16", + "arguments": [ + "float16x4_t a", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmulx_laneq_f32", @@ -50859,6 +59070,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmulx_n_f16", + "arguments": [ + "float16x4_t a", + "float16_t n" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "n": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmulxd_f64", @@ -50950,6 +59188,124 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmulxh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxh_lane_f16", + "arguments": [ + "float16_t a", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxh_laneq_f16", + "arguments": [ + "float16_t a", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vmulxq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmulxq_f32", @@ -51004,6 +59360,38 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmulxq_lane_f16", + "arguments": [ + "float16x8_t a", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmulxq_lane_f32", @@ -51068,6 +59456,38 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmulxq_laneq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t v", + "const int lane" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmulxq_laneq_f32", @@ -51132,6 +59552,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vmulxq_n_f16", + "arguments": [ + "float16x8_t a", + "float16_t n" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "n": { + "register": "Vm.H[0]" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMULX" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vmulxs_f32", @@ -51573,6 +60020,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vneg_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FNEG" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vneg_f32", @@ -51742,6 +60213,54 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vnegh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FNEG" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vnegq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FNEG" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vnegq_f32", @@ -53164,6 +61683,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vpadd_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FADDP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vpadd_f32", @@ -53736,6 +62283,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vpaddq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FADDP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vpaddq_f32", @@ -54029,6 +62603,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vpmax_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMAXP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vpmax_f32", @@ -54232,6 +62834,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxnm_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNMP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vpmaxnm_f32", @@ -54259,6 +62888,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxnmq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXNMP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vpmaxnmq_f32", @@ -54359,6 +63015,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vpmaxq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMAXP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vpmaxq_f32", @@ -54621,6 +63304,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vpmin_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FMINP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vpmin_f32", @@ -54824,6 +63535,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vpminnm_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNMP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vpminnm_f32", @@ -54851,6 +63589,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vpminnmq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINNMP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vpminnmq_f32", @@ -54951,6 +63716,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vpminq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FMINP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vpminq_f32", @@ -68541,6 +77333,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrecpe_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRECPE" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrecpe_f32", @@ -68637,6 +77453,53 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrecpeh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRECPE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpeq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRECPE" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrecpeq_f32", @@ -68733,6 +77596,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrecps_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRECPS" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrecps_f32", @@ -68816,6 +77707,61 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrecpsh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRECPS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrecpsq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRECPS" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrecpsq_f32", @@ -68922,6 +77868,29 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrecpxh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRECPX" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrecpxs_f32", @@ -68945,6 +77914,353 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_f32", + "arguments": [ + "float32x2_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_p16", + "arguments": [ + "poly16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_p64", + "arguments": [ + "poly64x1_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_s64", + "arguments": [ + "int64x1_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_u16", + "arguments": [ + "uint16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_u32", + "arguments": [ + "uint32x2_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_u64", + "arguments": [ + "uint64x1_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f16_u8", + "arguments": [ + "uint8x8_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f32_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpret_f32_f64", @@ -69218,6 +78534,29 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_f64_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpret_f64_f32", @@ -69494,6 +78833,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpret_p16_f32", @@ -69791,6 +79155,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p64_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpret_p64_f32", @@ -70054,6 +79442,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_p8_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpret_p8_f32", @@ -70351,6 +79764,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpret_s16_f32", @@ -70648,6 +80086,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s32_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpret_s32_f32", @@ -70945,6 +80408,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s64_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "int64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpret_s64_f32", @@ -71242,6 +80730,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_s8_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "int8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpret_s8_f32", @@ -71539,6 +81052,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u16_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpret_u16_f32", @@ -71836,6 +81374,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpret_u32_f32", @@ -71863,9 +81426,156 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u32_f64", + "name": "vreinterpret_u32_f64", + "arguments": [ + "float64x1_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_p16", + "arguments": [ + "poly16x4_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_p64", + "arguments": [ + "poly64x1_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_s16", + "arguments": [ + "int16x4_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_s32", + "arguments": [ + "int32x2_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_s64", "arguments": [ - "float64x1_t a" + "int64x1_t a" ], "return_type": { "value": "uint32x2_t" @@ -71876,6 +81586,8 @@ } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -71886,9 +81598,34 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u32_p16", + "name": "vreinterpret_u32_s8", "arguments": [ - "poly16x4_t a" + "int8x8_t a" + ], + "return_type": { + "value": "uint32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u32_u16", + "arguments": [ + "uint16x4_t a" ], "return_type": { "value": "uint32x2_t" @@ -71911,9 +81648,9 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u32_p64", + "name": "vreinterpret_u32_u64", "arguments": [ - "poly64x1_t a" + "uint64x1_t a" ], "return_type": { "value": "uint32x2_t" @@ -71924,6 +81661,7 @@ } }, "Architectures": [ + "v7", "A32", "A64" ], @@ -71935,9 +81673,9 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u32_p8", + "name": "vreinterpret_u32_u8", "arguments": [ - "poly8x8_t a" + "uint8x8_t a" ], "return_type": { "value": "uint32x2_t" @@ -71960,12 +81698,12 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u32_s16", + "name": "vreinterpret_u64_f16", "arguments": [ - "int16x4_t a" + "float16x4_t a" ], "return_type": { - "value": "uint32x2_t" + "value": "uint64x1_t" }, "Arguments_Preparation": { "a": { @@ -71985,12 +81723,12 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u32_s32", + "name": "vreinterpret_u64_f32", "arguments": [ - "int32x2_t a" + "float32x2_t a" ], "return_type": { - "value": "uint32x2_t" + "value": "uint64x1_t" }, "Arguments_Preparation": { "a": { @@ -72010,12 +81748,12 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u32_s64", + "name": "vreinterpret_u64_f64", "arguments": [ - "int64x1_t a" + "float64x1_t a" ], "return_type": { - "value": "uint32x2_t" + "value": "uint64x1_t" }, "Arguments_Preparation": { "a": { @@ -72023,8 +81761,6 @@ } }, "Architectures": [ - "v7", - "A32", "A64" ], "instructions": [ @@ -72035,16 +81771,16 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u32_s8", + "name": "vreinterpret_u64_p16", "arguments": [ - "int8x8_t a" + "poly16x4_t a" ], "return_type": { - "value": "uint32x2_t" + "value": "uint64x1_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.8B" + "register": "Vd.4H" } }, "Architectures": [ @@ -72060,20 +81796,19 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u32_u16", + "name": "vreinterpret_u64_p64", "arguments": [ - "uint16x4_t a" + "poly64x1_t a" ], "return_type": { - "value": "uint32x2_t" + "value": "uint64x1_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.4H" + "register": "Vd.1D" } }, "Architectures": [ - "v7", "A32", "A64" ], @@ -72085,16 +81820,16 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u32_u64", + "name": "vreinterpret_u64_p8", "arguments": [ - "uint64x1_t a" + "poly8x8_t a" ], "return_type": { - "value": "uint32x2_t" + "value": "uint64x1_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.1D" + "register": "Vd.8B" } }, "Architectures": [ @@ -72110,16 +81845,16 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u32_u8", + "name": "vreinterpret_u64_s16", "arguments": [ - "uint8x8_t a" + "int16x4_t a" ], "return_type": { - "value": "uint32x2_t" + "value": "uint64x1_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.8B" + "register": "Vd.4H" } }, "Architectures": [ @@ -72135,9 +81870,9 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u64_f32", + "name": "vreinterpret_u64_s32", "arguments": [ - "float32x2_t a" + "int32x2_t a" ], "return_type": { "value": "uint64x1_t" @@ -72160,9 +81895,9 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u64_f64", + "name": "vreinterpret_u64_s64", "arguments": [ - "float64x1_t a" + "int64x1_t a" ], "return_type": { "value": "uint64x1_t" @@ -72173,6 +81908,8 @@ } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -72183,9 +81920,34 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u64_p16", + "name": "vreinterpret_u64_s8", "arguments": [ - "poly16x4_t a" + "int8x8_t a" + ], + "return_type": { + "value": "uint64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u64_u16", + "arguments": [ + "uint16x4_t a" ], "return_type": { "value": "uint64x1_t" @@ -72208,19 +81970,20 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u64_p64", + "name": "vreinterpret_u64_u32", "arguments": [ - "poly64x1_t a" + "uint32x2_t a" ], "return_type": { "value": "uint64x1_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.1D" + "register": "Vd.2S" } }, "Architectures": [ + "v7", "A32", "A64" ], @@ -72232,9 +81995,9 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u64_p8", + "name": "vreinterpret_u64_u8", "arguments": [ - "poly8x8_t a" + "uint8x8_t a" ], "return_type": { "value": "uint64x1_t" @@ -72257,12 +82020,12 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u64_s16", + "name": "vreinterpret_u8_f16", "arguments": [ - "int16x4_t a" + "float16x4_t a" ], "return_type": { - "value": "uint64x1_t" + "value": "uint8x8_t" }, "Arguments_Preparation": { "a": { @@ -72282,12 +82045,12 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u64_s32", + "name": "vreinterpret_u8_f32", "arguments": [ - "int32x2_t a" + "float32x2_t a" ], "return_type": { - "value": "uint64x1_t" + "value": "uint8x8_t" }, "Arguments_Preparation": { "a": { @@ -72307,18 +82070,41 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u64_s64", + "name": "vreinterpret_u8_f64", "arguments": [ - "int64x1_t a" + "float64x1_t a" ], "return_type": { - "value": "uint64x1_t" + "value": "uint8x8_t" }, "Arguments_Preparation": { "a": { "register": "Vd.1D" } }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_p16", + "arguments": [ + "poly16x4_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, "Architectures": [ "v7", "A32", @@ -72332,12 +82118,36 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u64_s8", + "name": "vreinterpret_u8_p64", "arguments": [ - "int8x8_t a" + "poly64x1_t a" ], "return_type": { - "value": "uint64x1_t" + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_p8", + "arguments": [ + "poly8x8_t a" + ], + "return_type": { + "value": "uint8x8_t" }, "Arguments_Preparation": { "a": { @@ -72357,12 +82167,12 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u64_u16", + "name": "vreinterpret_u8_s16", "arguments": [ - "uint16x4_t a" + "int16x4_t a" ], "return_type": { - "value": "uint64x1_t" + "value": "uint8x8_t" }, "Arguments_Preparation": { "a": { @@ -72382,12 +82192,12 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u64_u32", + "name": "vreinterpret_u8_s32", "arguments": [ - "uint32x2_t a" + "int32x2_t a" ], "return_type": { - "value": "uint64x1_t" + "value": "uint8x8_t" }, "Arguments_Preparation": { "a": { @@ -72407,12 +82217,37 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u64_u8", + "name": "vreinterpret_u8_s64", "arguments": [ - "uint8x8_t a" + "int64x1_t a" ], "return_type": { - "value": "uint64x1_t" + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_s8", + "arguments": [ + "int8x8_t a" + ], + "return_type": { + "value": "uint8x8_t" }, "Arguments_Preparation": { "a": { @@ -72432,9 +82267,34 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u8_f32", + "name": "vreinterpret_u8_u16", "arguments": [ - "float32x2_t a" + "uint16x4_t a" + ], + "return_type": { + "value": "uint8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpret_u8_u32", + "arguments": [ + "uint32x2_t a" ], "return_type": { "value": "uint8x8_t" @@ -72457,9 +82317,9 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u8_f64", + "name": "vreinterpret_u8_u64", "arguments": [ - "float64x1_t a" + "uint64x1_t a" ], "return_type": { "value": "uint8x8_t" @@ -72470,6 +82330,8 @@ } }, "Architectures": [ + "v7", + "A32", "A64" ], "instructions": [ @@ -72480,16 +82342,16 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u8_p16", + "name": "vreinterpretq_f16_f32", "arguments": [ - "poly16x4_t a" + "float32x4_t a" ], "return_type": { - "value": "uint8x8_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.4H" + "register": "Vd.4S" } }, "Architectures": [ @@ -72505,16 +82367,39 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u8_p64", + "name": "vreinterpretq_f16_f64", "arguments": [ - "poly64x1_t a" + "float64x2_t a" ], "return_type": { - "value": "uint8x8_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.1D" + "register": "Vd.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_p128", + "arguments": [ + "poly128_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" } }, "Architectures": [ @@ -72529,16 +82414,16 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u8_p8", + "name": "vreinterpretq_f16_p16", "arguments": [ - "poly8x8_t a" + "poly16x8_t a" ], "return_type": { - "value": "uint8x8_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.8B" + "register": "Vd.8H" } }, "Architectures": [ @@ -72554,16 +82439,40 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u8_s16", + "name": "vreinterpretq_f16_p64", "arguments": [ - "int16x4_t a" + "poly64x2_t a" ], "return_type": { - "value": "uint8x8_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.4H" + "register": "Vd.2D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" } }, "Architectures": [ @@ -72579,16 +82488,16 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u8_s32", + "name": "vreinterpretq_f16_s16", "arguments": [ - "int32x2_t a" + "int16x8_t a" ], "return_type": { - "value": "uint8x8_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.2S" + "register": "Vd.8H" } }, "Architectures": [ @@ -72604,16 +82513,16 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u8_s64", + "name": "vreinterpretq_f16_s32", "arguments": [ - "int64x1_t a" + "int32x4_t a" ], "return_type": { - "value": "uint8x8_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.1D" + "register": "Vd.4S" } }, "Architectures": [ @@ -72629,16 +82538,16 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u8_s8", + "name": "vreinterpretq_f16_s64", "arguments": [ - "int8x8_t a" + "int64x2_t a" ], "return_type": { - "value": "uint8x8_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.8B" + "register": "Vd.2D" } }, "Architectures": [ @@ -72654,16 +82563,16 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u8_u16", + "name": "vreinterpretq_f16_s8", "arguments": [ - "uint16x4_t a" + "int8x16_t a" ], "return_type": { - "value": "uint8x8_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.4H" + "register": "Vd.16B" } }, "Architectures": [ @@ -72679,16 +82588,16 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u8_u32", + "name": "vreinterpretq_f16_u16", "arguments": [ - "uint32x2_t a" + "uint16x8_t a" ], "return_type": { - "value": "uint8x8_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.2S" + "register": "Vd.8H" } }, "Architectures": [ @@ -72704,16 +82613,91 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpret_u8_u64", + "name": "vreinterpretq_f16_u32", "arguments": [ - "uint64x1_t a" + "uint32x4_t a" ], "return_type": { - "value": "uint8x8_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Vd.1D" + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f16_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f32_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" } }, "Architectures": [ @@ -73000,6 +82984,29 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_f64_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpretq_f64_f32", @@ -73301,6 +83308,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpretq_p128_f32", @@ -73327,9 +83358,128 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpretq_p128_f64", + "name": "vreinterpretq_p128_f64", + "arguments": [ + "float64x2_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.1Q" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_p16", + "arguments": [ + "poly16x8_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_p8", + "arguments": [ + "poly8x16_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_s16", + "arguments": [ + "int16x8_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_s32", + "arguments": [ + "int32x4_t a" + ], + "return_type": { + "value": "poly128_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p128_s64", "arguments": [ - "float64x2_t a" + "int64x2_t a" ], "return_type": { "value": "poly128_t" @@ -73339,29 +83489,6 @@ "register": "Vd.1Q" } }, - "Architectures": [ - "A64" - ], - "instructions": [ - [ - "NOP" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vreinterpretq_p128_p16", - "arguments": [ - "poly16x8_t a" - ], - "return_type": { - "value": "poly128_t" - }, - "Arguments_Preparation": { - "a": { - "register": "Vd.8H" - } - }, "Architectures": [ "A32", "A64" @@ -73374,9 +83501,9 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpretq_p128_p8", + "name": "vreinterpretq_p128_s8", "arguments": [ - "poly8x16_t a" + "int8x16_t a" ], "return_type": { "value": "poly128_t" @@ -73398,9 +83525,9 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpretq_p128_s16", + "name": "vreinterpretq_p128_u16", "arguments": [ - "int16x8_t a" + "uint16x8_t a" ], "return_type": { "value": "poly128_t" @@ -73422,9 +83549,9 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpretq_p128_s32", + "name": "vreinterpretq_p128_u32", "arguments": [ - "int32x4_t a" + "uint32x4_t a" ], "return_type": { "value": "poly128_t" @@ -73446,9 +83573,9 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpretq_p128_s64", + "name": "vreinterpretq_p128_u64", "arguments": [ - "int64x2_t a" + "uint64x2_t a" ], "return_type": { "value": "poly128_t" @@ -73470,9 +83597,9 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpretq_p128_s8", + "name": "vreinterpretq_p128_u8", "arguments": [ - "int8x16_t a" + "uint8x16_t a" ], "return_type": { "value": "poly128_t" @@ -73494,12 +83621,12 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpretq_p128_u16", + "name": "vreinterpretq_p16_f16", "arguments": [ - "uint16x8_t a" + "float16x8_t a" ], "return_type": { - "value": "poly128_t" + "value": "poly16x8_t" }, "Arguments_Preparation": { "a": { @@ -73507,78 +83634,7 @@ } }, "Architectures": [ - "A32", - "A64" - ], - "instructions": [ - [ - "NOP" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vreinterpretq_p128_u32", - "arguments": [ - "uint32x4_t a" - ], - "return_type": { - "value": "poly128_t" - }, - "Arguments_Preparation": { - "a": { - "register": "Vd.4S" - } - }, - "Architectures": [ - "A32", - "A64" - ], - "instructions": [ - [ - "NOP" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vreinterpretq_p128_u64", - "arguments": [ - "uint64x2_t a" - ], - "return_type": { - "value": "poly128_t" - }, - "Arguments_Preparation": { - "a": { - "register": "Vd.1Q" - } - }, - "Architectures": [ - "A32", - "A64" - ], - "instructions": [ - [ - "NOP" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vreinterpretq_p128_u8", - "arguments": [ - "uint8x16_t a" - ], - "return_type": { - "value": "poly128_t" - }, - "Arguments_Preparation": { - "a": { - "register": "Vd.16B" - } - }, - "Architectures": [ + "v7", "A32", "A64" ], @@ -73909,6 +83965,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p64_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "poly64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpretq_p64_f32", @@ -74196,6 +84276,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpretq_p8_f32", @@ -74419,12 +84524,112 @@ }, { "SIMD_ISA": "Neon", - "name": "vreinterpretq_p8_u16", + "name": "vreinterpretq_p8_u16", + "arguments": [ + "uint16x8_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_u32", + "arguments": [ + "uint32x4_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_u64", + "arguments": [ + "uint64x2_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.2D" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_p8_u8", + "arguments": [ + "uint8x16_t a" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s16_f16", "arguments": [ - "uint16x8_t a" + "float16x8_t a" ], "return_type": { - "value": "poly8x16_t" + "value": "int16x8_t" }, "Arguments_Preparation": { "a": { @@ -74442,81 +84647,6 @@ ] ] }, - { - "SIMD_ISA": "Neon", - "name": "vreinterpretq_p8_u32", - "arguments": [ - "uint32x4_t a" - ], - "return_type": { - "value": "poly8x16_t" - }, - "Arguments_Preparation": { - "a": { - "register": "Vd.4S" - } - }, - "Architectures": [ - "v7", - "A32", - "A64" - ], - "instructions": [ - [ - "NOP" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vreinterpretq_p8_u64", - "arguments": [ - "uint64x2_t a" - ], - "return_type": { - "value": "poly8x16_t" - }, - "Arguments_Preparation": { - "a": { - "register": "Vd.2D" - } - }, - "Architectures": [ - "v7", - "A32", - "A64" - ], - "instructions": [ - [ - "NOP" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vreinterpretq_p8_u8", - "arguments": [ - "uint8x16_t a" - ], - "return_type": { - "value": "poly8x16_t" - }, - "Arguments_Preparation": { - "a": { - "register": "Vd.16B" - } - }, - "Architectures": [ - "v7", - "A32", - "A64" - ], - "instructions": [ - [ - "NOP" - ] - ] - }, { "SIMD_ISA": "Neon", "name": "vreinterpretq_s16_f32", @@ -74838,6 +84968,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s32_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "int32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpretq_s32_f32", @@ -75159,6 +85314,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s64_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "int64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpretq_s64_f32", @@ -75480,6 +85660,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_s8_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpretq_s8_f32", @@ -75801,6 +86006,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u16_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpretq_u16_f32", @@ -76122,6 +86352,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u32_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint32x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpretq_u32_f32", @@ -76443,6 +86698,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u64_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint64x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpretq_u64_f32", @@ -76764,6 +87044,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vreinterpretq_u8_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "NOP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vreinterpretq_u8_f32", @@ -77535,6 +87840,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrev64_f16", + "arguments": [ + "float16x4_t vec" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrev64_f32", @@ -77760,6 +88090,31 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrev64q_f16", + "arguments": [ + "float16x8_t vec" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "vec": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "REV64" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrev64q_f32", @@ -78701,6 +89056,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrnd_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTZ" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrnd_f32", @@ -78748,6 +89127,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrnda_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTA" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrnda_f32", @@ -78795,6 +89198,54 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrndah_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTA" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndaq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTA" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrndaq_f32", @@ -78842,6 +89293,53 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrndh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTZ" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndi_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINTI" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrndi_f32", @@ -78889,6 +89387,53 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrndih_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTI" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndiq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRINTI" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrndiq_f32", @@ -78936,6 +89481,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrndm_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTM" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrndm_f32", @@ -78983,6 +89552,54 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrndmh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTM" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndmq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTM" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrndmq_f32", @@ -79030,6 +89647,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrndn_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTN" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrndn_f32", @@ -79078,6 +89719,54 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrndnh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTN" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndnq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTN" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrndnq_f32", @@ -79150,6 +89839,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrndp_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrndp_f32", @@ -79197,6 +89910,54 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrndph_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTP" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndpq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTP" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrndpq_f32", @@ -79244,6 +90005,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrndq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTZ" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrndq_f32", @@ -79291,6 +90076,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrndx_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTX" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrndx_f32", @@ -79338,6 +90147,54 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrndxh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTX" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrndxq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRINTX" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrndxq_f32", @@ -80811,6 +91668,30 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrte_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRSQRTE" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrsqrte_f32", @@ -80907,6 +91788,53 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrteh_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRSQRTE" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrteq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRSQRTE" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrsqrteq_f32", @@ -81003,6 +91931,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrts_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRSQRTS" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrsqrts_f32", @@ -81086,6 +92042,61 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrtsh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FRSQRTS" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vrsqrtsq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FRSQRTS" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vrsqrtsq_f32", @@ -82137,6 +93148,40 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_f16", + "arguments": [ + "float16_t a", + "float16x4_t v", + "const int lane" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "VnH" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vset_lane_f32", @@ -82173,14 +93218,215 @@ }, { "SIMD_ISA": "Neon", - "name": "vset_lane_f64", + "name": "vset_lane_f64", + "arguments": [ + "float64_t a", + "float64x1_t v", + "const int lane" + ], + "return_type": { + "value": "float64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_p16", + "arguments": [ + "poly16_t a", + "poly16x4_t v", + "const int lane" + ], + "return_type": { + "value": "poly16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_p64", + "arguments": [ + "poly64_t a", + "poly64x1_t v", + "const int lane" + ], + "return_type": { + "value": "poly64x1_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 0 + }, + "v": { + "register": "Vd.1D" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_p8", + "arguments": [ + "poly8_t a", + "poly8x8_t v", + "const int lane" + ], + "return_type": { + "value": "poly8x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 7 + }, + "v": { + "register": "Vd.8B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_s16", + "arguments": [ + "int16_t a", + "int16x4_t v", + "const int lane" + ], + "return_type": { + "value": "int16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "v": { + "register": "Vd.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_s32", + "arguments": [ + "int32_t a", + "int32x2_t v", + "const int lane" + ], + "return_type": { + "value": "int32x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Rn" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "v": { + "register": "Vd.2S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "MOV" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vset_lane_s64", "arguments": [ - "float64_t a", - "float64x1_t v", + "int64_t a", + "int64x1_t v", "const int lane" ], "return_type": { - "value": "float64x1_t" + "value": "int64x1_t" }, "Arguments_Preparation": { "a": { @@ -82194,38 +93440,6 @@ "register": "Vd.1D" } }, - "Architectures": [ - "A64" - ], - "instructions": [ - [ - "MOV" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vset_lane_p16", - "arguments": [ - "poly16_t a", - "poly16x4_t v", - "const int lane" - ], - "return_type": { - "value": "poly16x4_t" - }, - "Arguments_Preparation": { - "a": { - "register": "Rn" - }, - "lane": { - "minimum": 0, - "maximum": 3 - }, - "v": { - "register": "Vd.4H" - } - }, "Architectures": [ "v7", "A32", @@ -82239,47 +93453,14 @@ }, { "SIMD_ISA": "Neon", - "name": "vset_lane_p64", - "arguments": [ - "poly64_t a", - "poly64x1_t v", - "const int lane" - ], - "return_type": { - "value": "poly64x1_t" - }, - "Arguments_Preparation": { - "a": { - "register": "Rn" - }, - "lane": { - "minimum": 0, - "maximum": 0 - }, - "v": { - "register": "Vd.1D" - } - }, - "Architectures": [ - "A32", - "A64" - ], - "instructions": [ - [ - "MOV" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vset_lane_p8", + "name": "vset_lane_s8", "arguments": [ - "poly8_t a", - "poly8x8_t v", + "int8_t a", + "int8x8_t v", "const int lane" ], "return_type": { - "value": "poly8x8_t" + "value": "int8x8_t" }, "Arguments_Preparation": { "a": { @@ -82306,14 +93487,14 @@ }, { "SIMD_ISA": "Neon", - "name": "vset_lane_s16", + "name": "vset_lane_u16", "arguments": [ - "int16_t a", - "int16x4_t v", + "uint16_t a", + "uint16x4_t v", "const int lane" ], "return_type": { - "value": "int16x4_t" + "value": "uint16x4_t" }, "Arguments_Preparation": { "a": { @@ -82340,14 +93521,14 @@ }, { "SIMD_ISA": "Neon", - "name": "vset_lane_s32", + "name": "vset_lane_u32", "arguments": [ - "int32_t a", - "int32x2_t v", + "uint32_t a", + "uint32x2_t v", "const int lane" ], "return_type": { - "value": "int32x2_t" + "value": "uint32x2_t" }, "Arguments_Preparation": { "a": { @@ -82374,14 +93555,14 @@ }, { "SIMD_ISA": "Neon", - "name": "vset_lane_s64", + "name": "vset_lane_u64", "arguments": [ - "int64_t a", - "int64x1_t v", + "uint64_t a", + "uint64x1_t v", "const int lane" ], "return_type": { - "value": "int64x1_t" + "value": "uint64x1_t" }, "Arguments_Preparation": { "a": { @@ -82408,14 +93589,14 @@ }, { "SIMD_ISA": "Neon", - "name": "vset_lane_s8", + "name": "vset_lane_u8", "arguments": [ - "int8_t a", - "int8x8_t v", + "uint8_t a", + "uint8x8_t v", "const int lane" ], "return_type": { - "value": "int8x8_t" + "value": "uint8x8_t" }, "Arguments_Preparation": { "a": { @@ -82442,127 +93623,25 @@ }, { "SIMD_ISA": "Neon", - "name": "vset_lane_u16", - "arguments": [ - "uint16_t a", - "uint16x4_t v", - "const int lane" - ], - "return_type": { - "value": "uint16x4_t" - }, - "Arguments_Preparation": { - "a": { - "register": "Rn" - }, - "lane": { - "minimum": 0, - "maximum": 3 - }, - "v": { - "register": "Vd.4H" - } - }, - "Architectures": [ - "v7", - "A32", - "A64" - ], - "instructions": [ - [ - "MOV" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vset_lane_u32", + "name": "vsetq_lane_f16", "arguments": [ - "uint32_t a", - "uint32x2_t v", + "float16_t a", + "float16x8_t v", "const int lane" ], "return_type": { - "value": "uint32x2_t" + "value": "float16x8_t" }, "Arguments_Preparation": { "a": { - "register": "Rn" - }, - "lane": { - "minimum": 0, - "maximum": 1 - }, - "v": { - "register": "Vd.2S" - } - }, - "Architectures": [ - "v7", - "A32", - "A64" - ], - "instructions": [ - [ - "MOV" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vset_lane_u64", - "arguments": [ - "uint64_t a", - "uint64x1_t v", - "const int lane" - ], - "return_type": { - "value": "uint64x1_t" - }, - "Arguments_Preparation": { - "a": { - "register": "Rn" - }, - "lane": { - "minimum": 0, - "maximum": 0 - }, - "v": { - "register": "Vd.1D" - } - }, - "Architectures": [ - "v7", - "A32", - "A64" - ], - "instructions": [ - [ - "MOV" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vset_lane_u8", - "arguments": [ - "uint8_t a", - "uint8x8_t v", - "const int lane" - ], - "return_type": { - "value": "uint8x8_t" - }, - "Arguments_Preparation": { - "a": { - "register": "Rn" + "register": "VnH" }, "lane": { "minimum": 0, "maximum": 7 }, "v": { - "register": "Vd.8B" + "register": "Vd.8H" } }, "Architectures": [ @@ -84031,7 +95110,7 @@ }, "n": { "minimum": 0, - "maximum": 15 + "maximum": 16 } }, "Architectures": [ @@ -84059,7 +95138,7 @@ }, "n": { "minimum": 0, - "maximum": 31 + "maximum": 32 } }, "Architectures": [ @@ -84087,7 +95166,7 @@ }, "n": { "minimum": 0, - "maximum": 7 + "maximum": 8 } }, "Architectures": [ @@ -84115,7 +95194,7 @@ }, "n": { "minimum": 0, - "maximum": 15 + "maximum": 16 } }, "Architectures": [ @@ -84143,7 +95222,7 @@ }, "n": { "minimum": 0, - "maximum": 31 + "maximum": 32 } }, "Architectures": [ @@ -84171,7 +95250,7 @@ }, "n": { "minimum": 0, - "maximum": 7 + "maximum": 8 } }, "Architectures": [ @@ -84199,7 +95278,7 @@ }, "n": { "minimum": 0, - "maximum": 15 + "maximum": 16 } }, "Architectures": [ @@ -84229,7 +95308,7 @@ }, "n": { "minimum": 0, - "maximum": 31 + "maximum": 32 } }, "Architectures": [ @@ -84259,7 +95338,7 @@ }, "n": { "minimum": 0, - "maximum": 7 + "maximum": 8 } }, "Architectures": [ @@ -84289,7 +95368,7 @@ }, "n": { "minimum": 0, - "maximum": 15 + "maximum": 16 } }, "Architectures": [ @@ -84319,7 +95398,7 @@ }, "n": { "minimum": 0, - "maximum": 31 + "maximum": 32 } }, "Architectures": [ @@ -84349,7 +95428,7 @@ }, "n": { "minimum": 0, - "maximum": 7 + "maximum": 8 } }, "Architectures": [ @@ -87136,6 +98215,29 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vsqrt_f16", + "arguments": [ + "float16x4_t a" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FSQRT" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vsqrt_f32", @@ -87182,6 +98284,53 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vsqrth_f16", + "arguments": [ + "float16_t a" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FSQRT" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vsqrtq_f16", + "arguments": [ + "float16x8_t a" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "FSQRT" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vsqrtq_f32", @@ -88646,6 +99795,122 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vst1_f16", + "arguments": [ + "float16_t * ptr", + "float16x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_f16_x2", + "arguments": [ + "float16_t * ptr", + "float16x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_f16_x3", + "arguments": [ + "float16_t * ptr", + "float16x4x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1_f16_x4", + "arguments": [ + "float16_t * ptr", + "float16x4x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vst1_f32", @@ -88870,6 +100135,40 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vst1_lane_f16", + "arguments": [ + "float16_t * ptr", + "float16x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vst1_lane_f32", @@ -90581,6 +101880,122 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_f16", + "arguments": [ + "float16_t * ptr", + "float16x8_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_f16_x2", + "arguments": [ + "float16_t * ptr", + "float16x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_f16_x3", + "arguments": [ + "float16_t * ptr", + "float16x8x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_f16_x4", + "arguments": [ + "float16_t * ptr", + "float16x8x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vst1q_f32", @@ -90805,6 +102220,40 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vst1q_lane_f16", + "arguments": [ + "float16_t * ptr", + "float16x8_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST1" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vst1q_lane_f32", @@ -92517,6 +103966,35 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vst2_f16", + "arguments": [ + "float16_t * ptr", + "float16x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vst2_f32", @@ -92573,6 +104051,40 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vst2_lane_f16", + "arguments": [ + "float16_t * ptr", + "float16x4x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vst2_lane_f32", @@ -93325,6 +104837,35 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_f16", + "arguments": [ + "float16_t * ptr", + "float16x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vst2q_f32", @@ -93381,6 +104922,40 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_lane_f16", + "arguments": [ + "float16_t * ptr", + "float16x8x2_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vst2q_lane_f32", @@ -93867,10 +105442,124 @@ }, { "SIMD_ISA": "Neon", - "name": "vst2q_p8", + "name": "vst2q_p8", + "arguments": [ + "poly8_t * ptr", + "poly8x16x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.16B" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_s16", + "arguments": [ + "int16_t * ptr", + "int16x8x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_s32", + "arguments": [ + "int32_t * ptr", + "int32x4x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.4S" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_s64", + "arguments": [ + "int64_t * ptr", + "int64x2x2_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt2.2D" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ST2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vst2q_s8", "arguments": [ - "poly8_t * ptr", - "poly8x16x2_t val" + "int8_t * ptr", + "int8x16x2_t val" ], "return_type": { "value": "void" @@ -93896,10 +105585,10 @@ }, { "SIMD_ISA": "Neon", - "name": "vst2q_s16", + "name": "vst2q_u16", "arguments": [ - "int16_t * ptr", - "int16x8x2_t val" + "uint16_t * ptr", + "uint16x8x2_t val" ], "return_type": { "value": "void" @@ -93925,10 +105614,10 @@ }, { "SIMD_ISA": "Neon", - "name": "vst2q_s32", + "name": "vst2q_u32", "arguments": [ - "int32_t * ptr", - "int32x4x2_t val" + "uint32_t * ptr", + "uint32x4x2_t val" ], "return_type": { "value": "void" @@ -93954,10 +105643,10 @@ }, { "SIMD_ISA": "Neon", - "name": "vst2q_s64", + "name": "vst2q_u64", "arguments": [ - "int64_t * ptr", - "int64x2x2_t val" + "uint64_t * ptr", + "uint64x2x2_t val" ], "return_type": { "value": "void" @@ -93981,10 +105670,10 @@ }, { "SIMD_ISA": "Neon", - "name": "vst2q_s8", + "name": "vst2q_u8", "arguments": [ - "int8_t * ptr", - "int8x16x2_t val" + "uint8_t * ptr", + "uint8x16x2_t val" ], "return_type": { "value": "void" @@ -94010,10 +105699,10 @@ }, { "SIMD_ISA": "Neon", - "name": "vst2q_u16", + "name": "vst3_f16", "arguments": [ - "uint16_t * ptr", - "uint16x8x2_t val" + "float16_t * ptr", + "float16x4x3_t val" ], "return_type": { "value": "void" @@ -94023,7 +105712,7 @@ "register": "Xn" }, "val": { - "register": "Vt2.8H" + "register": "Vt3.4H" } }, "Architectures": [ @@ -94033,16 +105722,16 @@ ], "instructions": [ [ - "ST2" + "ST3" ] ] }, { "SIMD_ISA": "Neon", - "name": "vst2q_u32", + "name": "vst3_f32", "arguments": [ - "uint32_t * ptr", - "uint32x4x2_t val" + "float32_t * ptr", + "float32x2x3_t val" ], "return_type": { "value": "void" @@ -94052,7 +105741,7 @@ "register": "Xn" }, "val": { - "register": "Vt2.4S" + "register": "Vt3.2S" } }, "Architectures": [ @@ -94062,16 +105751,16 @@ ], "instructions": [ [ - "ST2" + "ST3" ] ] }, { "SIMD_ISA": "Neon", - "name": "vst2q_u64", + "name": "vst3_f64", "arguments": [ - "uint64_t * ptr", - "uint64x2x2_t val" + "float64_t * ptr", + "float64x1x3_t val" ], "return_type": { "value": "void" @@ -94081,7 +105770,7 @@ "register": "Xn" }, "val": { - "register": "Vt2.2D" + "register": "Vt3.1D" } }, "Architectures": [ @@ -94089,55 +105778,31 @@ ], "instructions": [ [ - "ST2" + "ST1" ] ] }, { "SIMD_ISA": "Neon", - "name": "vst2q_u8", + "name": "vst3_lane_f16", "arguments": [ - "uint8_t * ptr", - "uint8x16x2_t val" + "float16_t * ptr", + "float16x4x3_t val", + "const int lane" ], "return_type": { "value": "void" }, "Arguments_Preparation": { - "ptr": { - "register": "Xn" + "lane": { + "minimum": 0, + "maximum": 3 }, - "val": { - "register": "Vt2.16B" - } - }, - "Architectures": [ - "v7", - "A32", - "A64" - ], - "instructions": [ - [ - "ST2" - ] - ] - }, - { - "SIMD_ISA": "Neon", - "name": "vst3_f32", - "arguments": [ - "float32_t * ptr", - "float32x2x3_t val" - ], - "return_type": { - "value": "void" - }, - "Arguments_Preparation": { "ptr": { "register": "Xn" }, "val": { - "register": "Vt3.2S" + "register": "Vt3.4H" } }, "Architectures": [ @@ -94151,33 +105816,6 @@ ] ] }, - { - "SIMD_ISA": "Neon", - "name": "vst3_f64", - "arguments": [ - "float64_t * ptr", - "float64x1x3_t val" - ], - "return_type": { - "value": "void" - }, - "Arguments_Preparation": { - "ptr": { - "register": "Xn" - }, - "val": { - "register": "Vt3.1D" - } - }, - "Architectures": [ - "A64" - ], - "instructions": [ - [ - "ST1" - ] - ] - }, { "SIMD_ISA": "Neon", "name": "vst3_lane_f32", @@ -94930,6 +106568,35 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_f16", + "arguments": [ + "float16_t * ptr", + "float16x8x3_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vst3q_f32", @@ -94986,6 +106653,40 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vst3q_lane_f16", + "arguments": [ + "float16_t * ptr", + "float16x8x3_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt3.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST3" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vst3q_lane_f32", @@ -95733,6 +107434,35 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vst4_f16", + "arguments": [ + "float16_t * ptr", + "float16x4x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vst4_f32", @@ -95789,6 +107519,40 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vst4_lane_f16", + "arguments": [ + "float16_t * ptr", + "float16x4x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 3 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vst4_lane_f32", @@ -96541,6 +108305,35 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_f16", + "arguments": [ + "float16_t * ptr", + "float16x8x4_t val" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vst4q_f32", @@ -96597,6 +108390,40 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vst4q_lane_f16", + "arguments": [ + "float16_t * ptr", + "float16x8x4_t val", + "const int lane" + ], + "return_type": { + "value": "void" + }, + "Arguments_Preparation": { + "lane": { + "minimum": 0, + "maximum": 7 + }, + "ptr": { + "register": "Xn" + }, + "val": { + "register": "Vt4.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ST4" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vst4q_lane_f32", @@ -97366,6 +109193,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vsub_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FSUB" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vsub_f32", @@ -97708,6 +109563,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vsubh_f16", + "arguments": [ + "float16_t a", + "float16_t b" + ], + "return_type": { + "value": "float16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Hn" + }, + "b": { + "register": "Hm" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FSUB" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vsubhn_high_s16", @@ -98404,6 +110287,34 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vsubq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A32", + "A64" + ], + "instructions": [ + [ + "FSUB" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vsubq_f32", @@ -99864,6 +111775,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vtrn1_f32", @@ -100107,6 +112045,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vtrn1q_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN1" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vtrn1q_f32", @@ -100458,6 +112423,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vtrn2_f32", @@ -100701,6 +112693,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vtrn2q_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "TRN2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vtrn2q_f32", @@ -101052,6 +113071,36 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vtrn_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vtrn_f32", @@ -101322,6 +113371,36 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vtrnq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "TRN1", + "TRN2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vtrnq_f32", @@ -102781,6 +114860,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vuzp1_f32", @@ -103024,6 +115130,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vuzp1q_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP1" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vuzp1q_f32", @@ -103375,6 +115508,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vuzp2_f32", @@ -103618,6 +115778,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vuzp2q_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "UZP2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vuzp2q_f32", @@ -103969,6 +116156,36 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vuzp_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vuzp_f32", @@ -104239,6 +116456,36 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vuzpq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "UZP1", + "UZP2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vuzpq_f32", @@ -104539,6 +116786,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vzip1_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vzip1_f32", @@ -104782,6 +117056,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vzip1q_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP1" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vzip1q_f32", @@ -105133,6 +117434,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vzip2_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vzip2_f32", @@ -105376,6 +117704,33 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vzip2q_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "ZIP2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vzip2q_f32", @@ -105727,6 +118082,36 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vzip_f16", + "arguments": [ + "float16x4_t a", + "float16x4_t b" + ], + "return_type": { + "value": "float16x4x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.4H" + }, + "b": { + "register": "Vm.4H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vzip_f32", @@ -105997,6 +118382,36 @@ ] ] }, + { + "SIMD_ISA": "Neon", + "name": "vzipq_f16", + "arguments": [ + "float16x8_t a", + "float16x8_t b" + ], + "return_type": { + "value": "float16x8x2_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm.8H" + } + }, + "Architectures": [ + "v7", + "A32", + "A64" + ], + "instructions": [ + [ + "ZIP1", + "ZIP2" + ] + ] + }, { "SIMD_ISA": "Neon", "name": "vzipq_f32",